Author

Dylan Li

Libraries

Code
library(tidyverse)
library(tidymodels)
library(discrim)
library(rpart)
library(rpart.plot)
library(baguette)
library(tidyclust)
library(caret)
library(RColorBrewer)
library(Polychrome)
library(forcats)

Grid Setup

Code
sample_grid <- matrix(c("Bear", "Bee", "Meadow", "Bear", "Meadow", "Meadow", "Bee", "Meadow", "Bee"),3,3,byrow=TRUE)
sample_grid
     [,1]   [,2]     [,3]    
[1,] "Bear" "Bee"    "Meadow"
[2,] "Bear" "Meadow" "Meadow"
[3,] "Bee"  "Meadow" "Bee"   
Code
sample_grid2 <- matrix(c("Meadow", "Meadow", "Bee", "Meadow", "Bee", "Meadow", "Bee", "Meadow", "Meadow"),3,3,byrow=TRUE)
sample_grid2
     [,1]     [,2]     [,3]    
[1,] "Meadow" "Meadow" "Bee"   
[2,] "Meadow" "Bee"    "Meadow"
[3,] "Bee"    "Meadow" "Meadow"
Code
big_grid1 <- matrix(c("Deer", "Meadow", "Bee", "Bear", "Fox", "Wolf", "Meadow", "Meadow", "Trout", "Stream",
                      "Deer", "Eagle", "Meadow", "Trout", "Stream", "Fox", "Rabbit", "Stream", "Dragonfly",
                      "Stream"),4,5,byrow=TRUE)
big_grid1
     [,1]   [,2]     [,3]     [,4]        [,5]    
[1,] "Deer" "Meadow" "Bee"    "Bear"      "Fox"   
[2,] "Wolf" "Meadow" "Meadow" "Trout"     "Stream"
[3,] "Deer" "Eagle"  "Meadow" "Trout"     "Stream"
[4,] "Fox"  "Rabbit" "Stream" "Dragonfly" "Stream"
Code
twenty_seven_x <- rep("x", 27)

super_vec <- append(twenty_seven_x, 
                      c("x", "x", 
                        "Deer", "Meadow", "Bee", "Bear", "Fox", 
                        "x", "x", 
                        "x", "x", 
                        "Wolf", "Meadow", "Meadow", "Trout", "Stream",
                        "x", "x", 
                        "x", "x", 
                        "Deer", "Eagle", "Meadow", "Trout", "Stream", 
                        "x", "x", 
                        "x", "x", 
                        "Fox", "Rabbit", "Stream", "Dragonfly","Stream",
                        "x", "x"))

super_grid1 <- matrix(super_vec,7,9,byrow=TRUE)
super_grid1
     [,1] [,2] [,3]   [,4]     [,5]     [,6]        [,7]     [,8] [,9]
[1,] "x"  "x"  "x"    "x"      "x"      "x"         "x"      "x"  "x" 
[2,] "x"  "x"  "x"    "x"      "x"      "x"         "x"      "x"  "x" 
[3,] "x"  "x"  "x"    "x"      "x"      "x"         "x"      "x"  "x" 
[4,] "x"  "x"  "Deer" "Meadow" "Bee"    "Bear"      "Fox"    "x"  "x" 
[5,] "x"  "x"  "Wolf" "Meadow" "Meadow" "Trout"     "Stream" "x"  "x" 
[6,] "x"  "x"  "Deer" "Eagle"  "Meadow" "Trout"     "Stream" "x"  "x" 
[7,] "x"  "x"  "Fox"  "Rabbit" "Stream" "Dragonfly" "Stream" "x"  "x" 

Board Generation

Code
cards <- c(rep("Bear", 12), 
           rep("Bee", 8), 
           rep("Meadow", 20),
           rep("Trout", 10),
           rep("Eagle", 8),
           rep("Rabbit", 8),
           rep("Dragonfly", 8),
           rep("Fox", 12),
           rep("Deer", 12),
           rep("Stream", 20),
           rep("Wolf", 12)
           )

generate_grid <- function(pool, partial_grid = NULL){
  if(is.null(partial_grid) == FALSE){
    blanks <- which(partial_grid == "x", TRUE)
    n = 20 - nrow(blanks)
    
  }else{
    n = 20
  }
  
  sample <- sample(pool, n)
  
  if(is.null(partial_grid) == FALSE){
    
    board <- partial_grid
    
    for (i in 1:nrow(blanks)){
      loc <- c(blanks[[i, 1]], blanks[[i, 2]])
      board[blanks[[i, 1]], blanks[[i, 2]]] = sample[i]
    }
    
  }else{
    board <- matrix(sample, nrow=4, ncol=5, byrow=TRUE)
  }
  
  return(board)
}

generate_grid(cards)
     [,1]     [,2]     [,3]     [,4]     [,5]       
[1,] "Eagle"  "Stream" "Meadow" "Trout"  "Dragonfly"
[2,] "Trout"  "Trout"  "Eagle"  "Meadow" "Bear"     
[3,] "Stream" "Wolf"   "Meadow" "Stream" "Trout"    
[4,] "Meadow" "Fox"    "Fox"    "Meadow" "Deer"     

Helper functions

Code
find_cardinals <- function(i, j, grid){
  cardinals <- list()
  maxrow = nrow(grid)
  maxcol = ncol(grid)
      
  if(i+1 <= maxrow){
    cardinals[[length(cardinals)+1]] <- c(i+1,j)
  }
  
  if(i-1 > 0){
    cardinals[[length(cardinals)+1]] <- c(i-1,j)
  }
  
  if(j+1 <= maxcol){
    cardinals[[length(cardinals)+1]] <- c(i,j+1)
  }
  
  if(j-1 > 0){
    cardinals[[length(cardinals)+1]] <- c(i,j-1)
  }
  
  return(cardinals)
}
Code
find_two_spaces <- function(i, j, grid){
  two_space <- list()
  maxrow = nrow(grid)
  maxcol = ncol(grid)
      
  if(i+1 <= maxrow){
    two_space[[length(two_space)+1]] <- c(i+1,j)
  }
  
  if(i+2 <= maxrow){
    two_space[[length(two_space)+1]] <- c(i+2,j)
  }
  
  if(i-1 > 0){
    two_space[[length(two_space)+1]] <- c(i-1,j)
  }
  
  if(i-2 > 0){
    two_space[[length(two_space)+1]] <- c(i-2,j)
  }
  
  if(j+1 <= maxcol){
    two_space[[length(two_space)+1]] <- c(i,j+1)
  }
  
  if(j+2 <= maxcol){
    two_space[[length(two_space)+1]] <- c(i,j+2)
  }
  
  if(j-1 > 0){
    two_space[[length(two_space)+1]] <- c(i,j-1)
  }
  
  if(j-2 > 0){
    two_space[[length(two_space)+1]] <- c(i,j-2)
  }
  
  if(i+1 <= maxrow && j+1 <= maxcol){
    two_space[[length(two_space)+1]] <- c(i+1,j+1)
  }
  
  if(i+1 <= maxrow && j-1 > 0){
    two_space[[length(two_space)+1]] <- c(i+1,j-1)
  }
  
  if(i-1 > 0 && j+1 <= maxcol){
    two_space[[length(two_space)+1]] <- c(i-1,j+1)
  }
  
  if(i-1 > 0 && j-1 > 0){
    two_space[[length(two_space)+1]] <- c(i-1,j-1)
  }
  
  return(two_space)
}
Code
find_more_meadows <- function(i, j, grid, meadow_list){
  meadow_list[[length(meadow_list)+1]] = as.double(c(i,j))
  current_caridnals <- find_cardinals(i, j, grid)
  for (k in current_caridnals){
    if (grid[k[1],k[2]] == "Meadow"){
      if ((list(k) %in% meadow_list) == FALSE){
        meadow_list = find_more_meadows(as.double(k[1]),as.double(k[2]), grid, meadow_list)
      }
    }
  }
  return(meadow_list)
  
}
Code
find_more_streams <- function(i, j, grid, stream_list){
  stream_list[[length(stream_list)+1]] = as.double(c(i,j))
  current_caridnals <- find_cardinals(i, j, grid)
  for (k in current_caridnals){
    if (grid[k[1],k[2]] == "Stream"){
      if ((list(k) %in% stream_list) == FALSE){
        stream_list = find_more_streams(as.double(k[1]),as.double(k[2]), grid, stream_list)
      }
    }
  }
  return(stream_list)
  
}

Scoring function

Code
score_grid <- function(grid, individual=FALSE){
  
  score = 0
  
  meadow_patch = list()
  first_meadow = TRUE
  stream_patch = list()
  first_stream = TRUE
  dragonfly_list = list()
  deer_row <- c()
  deer_col <- c()
  # first_wolf = TRUE
  num_wolves = 0
  
  
  bear_score = 0
  bee_score = 0
  meadow_score = 0
  trout_score = 0
  eagle_score = 0
  rabbit_score = 0
  dragonfly_score = 0
  fox_score = 0
  deer_score = 0
  stream_score = 0
  wolf_score = 0
  diversity_score = 0
  
  for (i in 1:nrow(grid)) {
    for (j in 1:ncol(grid)) {
      
      current_caridnals <- find_cardinals(i, j, grid)
      
      
      if(grid[i,j] == "Bear"){
        for (k in current_caridnals){
          if (grid[k[1],k[2]] == "Bee" || grid[k[1],k[2]] == "Trout"){
            score = score + 2
            bear_score = bear_score + 2
          }
        }
      }
      
      if(grid[i,j] == "Bee"){
        for (k in current_caridnals){
          if (grid[k[1],k[2]] == "Meadow"){
            score = score + 3
            bee_score = bee_score + 3
          }
        }
      }
      
      if(grid[i,j] == "Meadow"){
        if (first_meadow == TRUE){
          
          first_meadow = FALSE
          first_patch = list()
          completed_patch = find_more_meadows(as.double(i), as.double(j), grid, first_patch)
          meadow_patch[[length(meadow_patch)+1]] = completed_patch
          
        }else{
          
          exist = FALSE
          
          for (x in 1:length(meadow_patch)){
            if (list(as.double(c(i,j))) %in% meadow_patch[[x]]){
              exist = TRUE
            }
          }
          
          if (exist == FALSE){
            new_patch = list()
            completed_patch = find_more_meadows(as.double(i), as.double(j), grid, new_patch)
            meadow_patch[[length(meadow_patch)+1]] = completed_patch
          }
          
        }
      }
      
      if(grid[i,j] == "Trout"){
        for (k in current_caridnals){
          if (grid[k[1],k[2]] == "Dragonfly" || grid[k[1],k[2]] == "Stream"){
            score = score + 2
            trout_score = trout_score + 2
          }
        }
      }
      
      if(grid[i,j] == "Eagle"){
        
        two_space <- find_two_spaces(i, j, grid)
        
        for (k in two_space){
          if (grid[k[1],k[2]] == "Trout" || grid[k[1],k[2]] == "Rabbit"){
            score = score + 2
            eagle_score = eagle_score + 2
          }
        }
      }
      
      if(grid[i,j] == "Rabbit"){
        score = score + 1
        rabbit_score = rabbit_score + 1
      }
      
      if(grid[i,j] == "Dragonfly"){
        dragonfly_list[[length(dragonfly_list)+1]] = as.double(c(i,j))
      }
      
      if(grid[i,j] == "Fox"){
        score_it = TRUE
        
        for (k in current_caridnals){
          if (grid[k[1],k[2]] == "Bear" || grid[k[1],k[2]] == "Wolf"){
            score_it = FALSE
          }
        }
        
        if (score_it){
          score = score + 3
          fox_score = fox_score + 3
        }
      }
      
      if(grid[i,j] == "Deer"){
        deer_row <- append(deer_row, i)
        deer_col <- append(deer_col, j)
      }
      
      if(grid[i,j] == "Stream"){
        if (first_stream == TRUE){
          
          first_stream = FALSE
          first_patch = list()
          completed_patch = find_more_streams(as.double(i), as.double(j), grid, first_patch)
          stream_patch[[length(stream_patch)+1]] = completed_patch
          
        }else{
          
          exist = FALSE
          
          for (x in 1:length(stream_patch)){
            if (list(as.double(c(i,j))) %in% stream_patch[[x]]){
              exist = TRUE
            }
          }
          
          if (exist == FALSE){
            new_patch = list()
            completed_patch = find_more_streams(as.double(i), as.double(j), grid, new_patch)
            stream_patch[[length(stream_patch)+1]] = completed_patch
          }
          
        }
      }
      
      if(grid[i,j] == "Wolf"){
        # temporary stand in, can only be scored properly with more than 1 player
        # if (first_wolf == TRUE){
        #   score = score + 8
        #   wolf_score = wolf_score + 8
        #   first_wolf == FALSE
        # }
        
        num_wolves = num_wolves + 1
      }
      
    }
  }
  
  for (i in meadow_patch){
    if (length(i) == 2){
      score = score + 3
      meadow_score = meadow_score + 3
    }else if (length(i) == 3){
      score = score + 6
      meadow_score = meadow_score + 6
    }else if (length(i) == 4){
      score = score + 10
      meadow_score = meadow_score + 10
    }else if (length(i) >= 5){
      score = score + 15
      meadow_score = meadow_score + 15
    }
  }
  
  largest_stream = 0
  
  for (i in stream_patch){
    if (length(i) > largest_stream){
      largest_stream = length(i)
    }
  }
  
  # temporary scoring for largest stream, can only be scored properly with more than 1 player
  # if (largest_stream > 0){
  #   score = score + 5
  #   stream_score = stream_score + 5
  # }
  
  for (d in dragonfly_list){
    current_caridnals <- find_cardinals(d[1], d[2], grid)
    
    largest_score = 0
    
    for (k in current_caridnals){
      if (grid[k[1],k[2]] == "Stream"){
        for (s in stream_patch){
          if((list(k) %in% s) == TRUE){
            # OLD scoring below, incorrect
            # current_score = 2 * length(s)
            current_score = length(s) # NEW scoring, corrected
            if (current_score > largest_score){
              largest_score = current_score
            }
          }
        }
      }
    }
    
    score = score + largest_score
    dragonfly_score = dragonfly_score + largest_score
  }
  
  score = score + 2*length(unique(deer_row))
  score = score + 2*length(unique(deer_col))
  
  deer_score = deer_score + 2*length(unique(deer_row))
  deer_score = deer_score + 2*length(unique(deer_col))
  
  diversity_matrix <- matrix(c(bear_score, bee_score, meadow_score, trout_score, eagle_score, rabbit_score,
           dragonfly_score, fox_score, deer_score))
  
  diversity_vector <- c(bear_score, bee_score, meadow_score, trout_score, eagle_score, rabbit_score,
           dragonfly_score, fox_score, deer_score)

  gaps = colSums(diversity_matrix == 0)[1]
   
  # if(gaps >= 6){
  #   score = score - 5
  #   diversity_score = -5
  # }else if (gaps == 4){
  #   score = score + 3
  #   diversity_score = 3
  # }else if (gaps == 3){
  #   score = score + 7
  #   diversity_score = 7
  # }else if (gaps <= 2){
  #   score = score + 12
  #   diversity_score = 12
  # }
  if(individual==TRUE){
    if(largest_stream == 0){
      gaps = gaps + 1
    }
    if(num_wolves == 0){
      gaps = gaps + 1
    }
  
    if(gaps >= 6){
        dv_score = -5
      }else if (gaps == 4){
        dv_score = 3
      }else if (gaps == 3){
        dv_score = 7
      }else if (gaps <= 2){
        dv_score = 12
      }else{
        dv_score = 0
      }
    
    return(c(diversity_vector,largest_stream, num_wolves, dv_score))
  }else{
    return(c(score, largest_stream, num_wolves, gaps))
  }
  
}

Solo play scoring

Code
solo_score <- function(score_vector){
  score = score_vector[1]
  stream_size = score_vector[2]
  num_wolves = score_vector[3]
  num_gaps = score_vector[4]
  
  score = score + stream_size + num_wolves
  
  if(stream_size == 0){
    num_gaps = num_gaps + 1
  }
  if(num_wolves == 0){
    num_gaps = num_gaps + 1
  }
  
  if(num_gaps >= 6){
      score = score - 5
    }else if (num_gaps == 4){
      score = score + 3
    }else if (num_gaps == 3){
      score = score + 7
    }else if (num_gaps <= 2){
      score = score + 12
    }
  
  return(score)
}

Multiplayer Scoring

Code
mp_score <- function(score_list){
  
  # each entry in score_list follows the format: c(score, size_of_largest_stream, num_wolves, diversity_gaps)
  
  if(length(score_list) > 2){
    more_than_2 = TRUE
  }else{
    more_than_2 = FALSE
  }
  
  score <- rep(0, length(score_list))
  stream_size <- rep(0, length(score_list))
  num_wolves <- rep(0, length(score_list))
  num_gaps <- rep(0, length(score_list))
  
  for (i in 1:length(score_list)){
    
    score[i] = score_list[[i]][1]
    
    stream_size[i] = score_list[[i]][2]
    
    num_wolves[i] = score_list[[i]][3]
    
    num_gaps[i] = score_list[[i]][4] + 2
    
  }
  
  print(score)
  
  stream_size = sort(stream_size, decreasing = TRUE)
  num_wolves = sort(num_wolves, decreasing = TRUE)
  
  stream_matrix = matrix(stream_size)
  wolf_matrix = matrix(num_wolves)
  
  largest_stream = stream_size[1]
  second_stream = stream_size[2]
  
  score_largest_s = TRUE
  score_second_s = TRUE
  
  if(colSums(stream_matrix == largest_stream)[1] > 1){
    score_second_s = FALSE
  }
  
  for (i in 1:length(score_list)){
      if (score_list[[i]][2] == largest_stream && largest_stream != 0){
        score[i] = score[i] + 8
        num_gaps[i] = num_gaps[i] - 1
      }
  }
  
  if(score_second_s == TRUE){
    for (i in 1:length(score_list)){
      if (score_list[[i]][2] == second_stream && second_stream != 0){
        score[i] = score[i] + 5
        num_gaps[i] = num_gaps[i] - 1
      }
    }
  }
  
  print(score)
  
  most_wolves = num_wolves[1]
  second_wolves = num_wolves[2]
  if(more_than_2){
    third_wolves = num_wolves[3]
  }
  
  score_most_w = TRUE
  score_second_w = TRUE
  score_third_w = TRUE
  
  if(colSums(wolf_matrix == most_wolves)[1] > 1){
    score_second_w = FALSE
    if(colSums(wolf_matrix == most_wolves)[1] > 2){
      score_third_w = FALSE
    }
  }
  
  for (i in 1:length(score_list)){
      if (score_list[[i]][3] == most_wolves){
        score[i] = score[i] + 12
        num_gaps[i] = num_gaps[i] - 1
      }
  }
  
  if(colSums(wolf_matrix == second_wolves)[1] > 1){
    score_third_w = FALSE
  }
  
  if(score_second_w == TRUE){
    for (i in 1:length(score_list)){
      if (score_list[[i]][3] == second_wolves){
        score[i] = score[i] + 8
        num_gaps[i] = num_gaps[i] - 1
      }
    }
  }
  
  if(score_third_w == TRUE && more_than_2 == TRUE){
    for (i in 1:length(score_list)){
      if (score_list[[i]][3] == third_wolves){
        score[i] = score[i] + 4
        num_gaps[i] = num_gaps[i] - 1
      }
    }
  }
  
  print(score)
  
  for(i in 1:length(score_list)){
    if(num_gaps[i] >= 6){
      score[i] = score[i] - 5
    }else if (num_gaps[i] == 4){
      score[i] = score[i] + 3
    }else if (num_gaps[i] == 3){
      score[i] = score[i] + 7
    }else if (num_gaps[i] <= 2){
      score[i] = score[i] + 12
    }
  }
  
  
  
  return(score)
  
}

Baseline function

Code
baseline_sim <- function(cards, n = 10000){
  all_scores <- c()
  for (i in 1:n){
    sim_grid <- generate_grid(cards)
    all_scores <- c(all_scores, solo_score(score_grid(sim_grid)))
  }
  return(all_scores)
}

Random Walk MCMC

Code
rw_mcmc <- function(grid, iterations = 1000, acceptance_func = "simple", beta = 0, bp = 500, original = NULL, record_board = FALSE){
  start_score <- solo_score(score_grid(grid))
  start_grid <- grid
  current_grid <- grid
  continue <- TRUE
  rows <- rep(1:nrow(grid))
  cols <- rep(1:ncol(grid))
  iter = 0
  score_vector <- c()
  highest_score <- start_score
  highest_grid <- grid
  highest_iter <- 0
  
  while(continue){
    current_score <- solo_score(score_grid(current_grid))
    if(bp == 0){
    }else if(iter%%bp == 0){
      if(acceptance_func == "annealing dynamic" || 
         acceptance_func == "delayed"){
        current_grid = highest_grid
        current_score = highest_score
      }
    }
    score_vector = c(score_vector, current_score)
    if(current_score > highest_score){
      highest_score = current_score
      highest_grid = current_grid
      highest_iter = iter
    }
    
    # Choosing swap locations if grid is partially complete
    if(is.null(original) == FALSE){
      blanks <- which(original == "x", TRUE)
      possible <- list()
      
      for (i in 1:nrow(blanks)){
        loc <- c(blanks[[i, 1]], blanks[[i, 2]])
        possible[[length(possible)+1]] = loc
      }
      
      start_loc <- sample(possible, 1)
      start_loc <- start_loc[[1]]
      start_row <- start_loc[1]
      start_col <- start_loc[2]
      
      end_loc <- sample(possible, 1)
      end_loc <- end_loc[[1]]
      end_row <- end_loc[1]
      end_col <- end_loc[2]
      
      while(current_grid[start_row, start_col] == current_grid[end_row, end_col]){
        end_loc <- sample(possible, 1)
        end_loc <- end_loc[[1]]
        end_row <- end_loc[1]
        end_col <- end_loc[2]
      }
    
    # Choosing swap locations without partial grid  
    }else{
      
      start_row <- sample(rows, 1)
      start_col <- sample(cols, 1)
      
      # Random swap anywhere
      end_row <- sample(rows, 1)
      end_col <- sample(cols, 1)
  
      while(current_grid[start_row, start_col] == current_grid[end_row, end_col]){
        end_row <- sample(rows, 1)
        end_col <- sample(cols, 1)
      }
    }
    # Adjacent swap only
    # possible_end <- find_cardinals(start_row, start_col, grid)
    # end <- sample(possible_end, 1)
    # end_row <- end[[1]][1]
    # end_col <- end[[1]][2]
    
    proposed_grid <- current_grid
    
    # if(iter == 347 || iter == 668){
    #   print(proposed_grid)
    # }
    
    start <- current_grid[start_row, start_col]
    end <- current_grid[end_row, end_col]
    
    proposed_grid[start_row, start_col] = end
    proposed_grid[end_row, end_col] = start
    
    proposed_score <- solo_score(score_grid(proposed_grid))
    
    if(acceptance_func == "simple"){
      p = proposed_score
      c = current_score
      if(p <= 0){
        p = p + -1*p + 1
        c = c + -1*p + 1
      }
      if(c <= 0){
        c = c + -1*c + 1
        p = p + -1*c + 1
      }
      
      x <- runif(1,0,1)
      if(x < p/c){
        current_grid <- proposed_grid
      }
    }
    
    if(acceptance_func == "annealing"){
      p = proposed_score
      c = current_score
      
      x <- runif(1,0,1)
      if(x < exp(beta*p)/exp(beta*c)){
        current_grid <- proposed_grid
      }
    }
    
    if(acceptance_func == "annealing dynamic"){
      p = proposed_score
      c = current_score
      
      factor = floor(iter/bp) + 1
      b = beta*(factor)
      
      x <- runif(1,0,1)
      if(x < exp(b*p)/exp(b*c)){
        current_grid <- proposed_grid
      }
    }
    
    if(acceptance_func == "delayed"){
      p = proposed_score
      c = current_score
      if(p <= 0){
        p = p + -1*p + 1
        c = c + -1*p + 1
      }
      if(c <= 0){
        c = c + -1*c + 1
        p = p + -1*c + 1
      }
      
      x <- runif(1,0,1)
      if(x < p/c && p/c < 1){
        p2 = proposed_score
        c2 = current_score
        
        factor = floor(iter/500) + 1
        b = beta*(factor)
        
        x <- runif(1,0,1)
        if(x < (exp(b*p2)*c)/(exp(b*c2)*p)){
          current_grid <- proposed_grid
        }
      }else if(p/c >= 1){
        current_grid <- proposed_grid
      }
    }
    
    iter = iter + 1
      if(iter >= iterations){
        continue = FALSE
      }
  }
  
  final_score <- solo_score(score_grid(current_grid))
  if(final_score > highest_score){
      highest_score = final_score
      highest_grid = grid
    }
  
  if(record_board == TRUE){
    return(c(t(highest_grid), highest_score, t(start_grid), start_score))
  }else{
    return(c(highest_score, final_score, start_score, highest_iter, data.frame(score_vector)))
  }
}
Code
multi_mcmc <- function(iterations, n, acceptance_func = "simple", beta = 0, bp = 500, grid = NULL, boardlist = NULL, record_board = FALSE, cards = NULL, card_name = NULL){
  first = TRUE
  start_scores <- c()
  highest_scores <- c()
  highest_iter <- c()
  if(is.null(cards) == TRUE){
    cards <- c(rep("Bear", 12), 
           rep("Bee", 8), 
           rep("Meadow", 20),
           rep("Trout", 10),
           rep("Eagle", 8),
           rep("Rabbit", 8),
           rep("Dragonfly", 8),
           rep("Fox", 12),
           rep("Deer", 12),
           rep("Stream", 20),
           rep("Wolf", 12)
           )
    if(is.null(card_name) == TRUE){
      card_name = "default"
    }
  }
  
  
  # Creating proper pool of cards if grid is partially complete
  if(is.null(grid) == FALSE){
    df <- as.data.frame(table(grid))
    animals <- levels(df$grid)
    for(i in 1:nrow(grid)){
      animal <- animals[i]
      num = length(cards[cards == animal])
      cards = cards[!cards == animal]
      cards <- c(cards, rep(animal, num - df[i,2]))
    }
  }
  
  for(i in 1:n){
    
    if(is.null(grid) == FALSE){
      
      if(record_board == TRUE){
        sim_grid <- generate_grid(cards, grid)
        run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp, grid, record_board = TRUE)
      }else{
        sim_grid <- generate_grid(cards, grid)
        run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp, grid)
      }
      
    }else if(is.null(boardlist) == FALSE){
      
      if(record_board == TRUE){
        run <- rw_mcmc(boardlist[[i]], iterations, acceptance_func, beta, bp, record_board = TRUE)
      }else{
        run <- rw_mcmc(boardlist[[i]], iterations, acceptance_func, beta, bp)
      }
      
    }else{
      if(record_board == TRUE){
        sim_grid <- generate_grid(cards)
        run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp, record_board = TRUE)
      }else{
        sim_grid <- generate_grid(cards)
        run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp)
      }
    }
    
    if(record_board == TRUE){
      if(first == TRUE){
        first = FALSE
        
        df <- data.frame(
          row1col1 = run[[1]],
          row1col2 = run[[2]],
          row1col3 = run[[3]],
          row1col4 = run[[4]],
          row1col5 = run[[5]],
          row2col1 = run[[6]],
          row2col2 = run[[7]],
          row2col3 = run[[8]],
          row2col4 = run[[9]],
          row2col5 = run[[10]],
          row3col1 = run[[11]],
          row3col2 = run[[12]],
          row3col3 = run[[13]],
          row3col4 = run[[14]],
          row3col5 = run[[15]],
          row4col1 = run[[16]],
          row4col2 = run[[17]],
          row4col3 = run[[18]],
          row4col4 = run[[19]],
          row4col5 = run[[20]],
          score = run[[21]],
          pool = card_name,
          start_score = run[[42]],
          row1col1_s = run[[22]],
          row1col2_s = run[[23]],
          row1col3_s = run[[24]],
          row1col4_s = run[[25]],
          row1col5_s = run[[26]],
          row2col1_s = run[[27]],
          row2col2_s = run[[28]],
          row2col3_s = run[[29]],
          row2col4_s = run[[30]],
          row2col5_s = run[[31]],
          row3col1_s = run[[32]],
          row3col2_s = run[[33]],
          row3col3_s = run[[34]],
          row3col4_s = run[[35]],
          row3col5_s = run[[36]],
          row4col1_s = run[[37]],
          row4col2_s = run[[38]],
          row4col3_s = run[[39]],
          row4col4_s = run[[40]],
          row4col5_s = run[[41]]
        )
        
      }else{
        row <- data.frame(
          row1col1 = run[[1]],
          row1col2 = run[[2]],
          row1col3 = run[[3]],
          row1col4 = run[[4]],
          row1col5 = run[[5]],
          row2col1 = run[[6]],
          row2col2 = run[[7]],
          row2col3 = run[[8]],
          row2col4 = run[[9]],
          row2col5 = run[[10]],
          row3col1 = run[[11]],
          row3col2 = run[[12]],
          row3col3 = run[[13]],
          row3col4 = run[[14]],
          row3col5 = run[[15]],
          row4col1 = run[[16]],
          row4col2 = run[[17]],
          row4col3 = run[[18]],
          row4col4 = run[[19]],
          row4col5 = run[[20]],
          score = run[[21]],
          pool = card_name,
          start_score = run[[42]],
          row1col1_s = run[[22]],
          row1col2_s = run[[23]],
          row1col3_s = run[[24]],
          row1col4_s = run[[25]],
          row1col5_s = run[[26]],
          row2col1_s = run[[27]],
          row2col2_s = run[[28]],
          row2col3_s = run[[29]],
          row2col4_s = run[[30]],
          row2col5_s = run[[31]],
          row3col1_s = run[[32]],
          row3col2_s = run[[33]],
          row3col3_s = run[[34]],
          row3col4_s = run[[35]],
          row3col5_s = run[[36]],
          row4col1_s = run[[37]],
          row4col2_s = run[[38]],
          row4col3_s = run[[39]],
          row4col4_s = run[[40]],
          row4col5_s = run[[41]]
        )
        df <- rbind(df, row) 
      }
    }else{
      start_scores <- c(start_scores, run[[3]])
      highest_scores <- c(highest_scores, run[[1]])
      highest_iter <- c(highest_iter, run[[4]])
    }
    
  }
  
  if(record_board == TRUE){
    return(df)
  }else{
    return(list(start_scores, highest_scores, highest_iter))
  }
  
}

Data Generation

Code
# set.seed(4)
# x1 <- multi_mcmc(2000, 100, "simple")
Code
# mean(x1[[1]])
# sd(x1[[1]])
# var(x1[[1]])
# max(x1[[1]])
# min(x1[[1]])
# summary(x1[[1]])
Code
# mean(x1[[2]])
# sd(x1[[2]])
# var(x1[[2]])
# max(x1[[2]])
# min(x1[[2]])
# summary(x1[[2]])
# summary(x1[[3]])
Code
# set.seed(4)
# x2 <- multi_mcmc(2000, 100, "annealing", 0.8, 250)
Code
# mean(x2[[1]])
# sd(x2[[1]])
# var(x2[[1]])
# max(x2[[1]])
# min(x2[[1]])
# summary(x2[[1]])
Code
# mean(x2[[2]])
# sd(x2[[2]])
# var(x2[[2]])
# max(x2[[2]])
# min(x2[[2]])
# summary(x2[[2]])
# summary(x2[[3]])
Code
# set.seed(4)
# x3 <- multi_mcmc(2000, 100, "annealing dynamic", 0.08, 250)
Code
# mean(x3[[1]])
# sd(x3[[1]])
# var(x3[[1]])
# max(x3[[1]])
# min(x3[[1]])
# summary(x3[[1]])
Code
# mean(x3[[2]])
# sd(x3[[2]])
# var(x3[[2]])
# max(x3[[2]])
# min(x3[[2]])
# summary(x3[[2]])
# summary(x3[[3]])
Code
# set.seed(4)
# x4 <- multi_mcmc(2000, 100, "annealing dynamic", 0.2, 250)
Code
# mean(x4[[1]])
# sd(x4[[1]])
# var(x4[[1]])
# max(x4[[1]])
# min(x4[[1]])
# summary(x4[[1]])
Code
# mean(x4[[2]])
# sd(x4[[2]])
# var(x4[[2]])
# max(x4[[2]])
# min(x4[[2]])
# summary(x4[[2]])
Code
# set.seed(49)
# x5 <- multi_mcmc(2000, 100, "annealing", 0.8, 250)
Code
# mean(x5[[1]])
# sd(x5[[1]])
# var(x5[[1]])
# max(x5[[1]])
# min(x5[[1]])
# summary(x5[[1]])
Code
# mean(x5[[2]])
# sd(x5[[2]])
# var(x5[[2]])
# max(x5[[2]])
# min(x5[[2]])
# summary(x5[[2]])
# summary(x5[[3]])
Code
# set.seed(49)
# x6 <- multi_mcmc(2000, 100, "delayed", 0.2, 250)
Code
# mean(x6[[1]])
# sd(x6[[1]])
# var(x6[[1]])
# max(x6[[1]])
# min(x6[[1]])
# summary(x6[[1]])
Code
# mean(x6[[2]])
# sd(x6[[2]])
# var(x6[[2]])
# max(x6[[2]])
# min(x6[[2]])
# summary(x6[[2]])
# summary(x6[[3]])
Code
# set.seed(49)
# x7 <- multi_mcmc(1000, 1000, "annealing", 0.8, 250)
Code
# mean(x7[[1]])
# sd(x7[[1]])
# var(x7[[1]])
# max(x7[[1]])
# min(x7[[1]])
# summary(x7[[1]])
Code
# mean(x7[[2]])
# sd(x7[[2]])
# var(x7[[2]])
# max(x7[[2]])
# min(x7[[2]])
# summary(x7[[2]])
# summary(x7[[3]])
Code
test_grid1 <- matrix(c("x", "Bee", "Meadow", "x", "x", "x", "Meadow", "Meadow", "x", "x",
                      "Deer", "Bee", "Meadow", "x", "x", "x", "Bear", "Bee", "Deer",
                      "x"),4,5,byrow=TRUE)
test_grid1
     [,1]   [,2]     [,3]     [,4]   [,5]
[1,] "x"    "Bee"    "Meadow" "x"    "x" 
[2,] "x"    "Meadow" "Meadow" "x"    "x" 
[3,] "Deer" "Bee"    "Meadow" "x"    "x" 
[4,] "x"    "Bear"   "Bee"    "Deer" "x" 
Code
# set.seed(4)
# partial1 <- multi_mcmc(1000, 100, "annealing", 0.8, 200, test_grid1)
Code
# mean(partial1[[1]])
# sd(partial1[[1]])
# var(partial1[[1]])
# max(partial1[[1]])
# min(partial1[[1]])
# summary(partial1[[1]])
Code
# mean(partial1[[2]])
# sd(partial1[[2]])
# var(partial1[[2]])
# max(partial1[[2]])
# min(partial1[[2]])
# summary(partial1[[2]])
# summary(partial1[[3]])

Tuning

Code
tune <- function(iter, beta, bp, type, boardlist = NULL){
  beta_df <- c()
  bp_df <- c()
  iter_df <- c()
  start_score <- c()
  highest_score <- c()
  iter_at_highest <- c()
  highest_score_sd <- c()
  highest_iter_sd <- c()
  max_highest <- c()
  percentile_90 <- c()
  score_65_plus <- c()
  for(i in beta){
    for (j in bp){
      for(k in iter){
        if(is.null(boardlist) == FALSE){
          sim <- multi_mcmc(k, 100, type, i, j, boardlist = boardlist)
        }else{
          sim <- multi_mcmc(k, 100, type, i, j)
        }
        beta_df <- c(beta_df, i)
        bp_df <- c(bp_df, j)
        iter_df <- c(iter_df, k)
        start_score <- c(start_score, mean(sim[[1]]))
        highest_score <- c(highest_score, mean(sim[[2]]))
        iter_at_highest <- c(iter_at_highest, mean(sim[[3]]))
        highest_iter_sd <- c(highest_iter_sd, sd(sim[[3]]))
        highest_score_sd <- c(highest_score_sd, sd(sim[[2]]))
        max_highest <- c(max_highest, max(sim[[2]]))
        percentile_90 <- c(percentile_90, quantile(sim[[2]],probs=0.9))
        s <- sim[[2]]
        score_65_plus <- c(score_65_plus, length(s[s>=65]))
      }
      
    }
  }
  
  df <- data.frame(
    iterations = iter_df,
    beta = beta_df,
    break_point = bp_df,
    mean_start_score = start_score,
    mean_highest_score = highest_score,
    sd_highest_score = highest_score_sd,
    max_score = max_highest,
    score_90th_percent = percentile_90,
    score_65_plus = score_65_plus,
    average_iterations = iter_at_highest,
    sd_iterations = highest_iter_sd,
    type = type
  )
  
  return(df)
}
Code
beta <- c(0.3,0.8,0.9)
bp <- c(125, 250)
iter <- c(500, 750)

# set.seed(4)
# tune1 <- tune(iter, beta, bp, "delayed")
Code
# tune1
Code
generate_100 <- function(pool){
  board_list <- list()
  for(i in 1:100){
    board <- generate_grid(pool)
    board_list[[length(board_list)+1]] = board
  }
  return(board_list)
}
Code
# set.seed(88)
set.seed(89)
board100 <- generate_100(cards)

OLD scoring function

Code
# OLD <- multi_mcmc(1000, 100, "annealing dynamic", 0.9, 200, record_board = TRUE, boardlist = board100)

NEW scoring function

Code
# NEW <- multi_mcmc(1000, 100, "annealing dynamic", 0.9, 200, record_board = TRUE, boardlist = board100)
Code
# tune_exact1 <- tune(1000, 0.99, 250, "annealing dynamic", boardlist = board100)
# tune_exact2 <- tune(1000, 0.9, 200, "annealing dynamic", boardlist = board100)
# tune_exact3 <- tune(2000, 0.7, 500, "delayed", boardlist = board100)
# tune_exact4 <- tune(2000, 0.8, 500, "annealing dynamic", boardlist = board100)
# tune_exact5 <- tune(1500, 0.9, 500, "annealing dynamic", boardlist = board100)
# tune_exact6 <- tune(750, 0.99, 250, "annealing dynamic", boardlist = board100)
# tune_exact7 <- tune(750, 0.3, 250, "delayed", boardlist = board100)
# tune_exact8 <- tune(500, 0.99, 250, "annealing dynamic", boardlist = board100)
# tune_exact9 <- tune(2000, 0.9, 250, "annealing dynamic", boardlist = board100)
# tune_exact10 <- tune(750, 0.9, 125, "annealing dynamic", boardlist = board100)
# tune_exact11 <- tune(1500, 0.9, 250, "annealing dynamic", boardlist = board100)
Code
# final_params <- rbind(tune_exact1,
#                       tune_exact2,
#                       tune_exact3,
#                       tune_exact4,
#                       tune_exact5,
#                       tune_exact6,
#                       tune_exact7,
#                       tune_exact8,
#                       tune_exact9,
#                       tune_exact10,
#                       tune_exact11)
# 
# write.csv(final_params,here::here("final-parameters-CORRECTED-seed2.csv"), row.names = FALSE)
Code
# write.csv(tune1,here::here("new-parameters.csv"),row.names = FALSE)
Code
tuned_params1 <- read.csv(here::here("final-parameters-CORRECTED.csv"))
Code
tuned_params2 <- read.csv(here::here("final-parameters-CORRECTED-seed2.csv"))
Code
# new_params <- rbind(tuned_params1, tune1)
# write.csv(new_params,here::here("new-parameters.csv"), row.names = FALSE)
Code
# set.seed(45)
# sim_grid1 <- generate_grid(cards)
# x <- rw_mcmc(sim_grid1, 2000, "annealing dynamic", beta = 0.8, 250)
Code
# xz <- data.frame(iter = rep(1:length(x[[4]])), scores = x[[4]])
# ggplot(aes(x = iter, y = scores), data = xz) +
#   geom_line()

Database

Code
cards <- c(rep("Bear", 12), 
           rep("Bee", 8), 
           rep("Meadow", 20),
           rep("Trout", 10),
           rep("Eagle", 8),
           rep("Rabbit", 8),
           rep("Dragonfly", 8),
           rep("Fox", 12),
           rep("Deer", 12),
           rep("Stream", 20),
           rep("Wolf", 12)
           )

dfly_stream <- c(rep("Bear", 6), 
           rep("Bee", 4), 
           rep("Meadow", 10),
           rep("Trout", 5),
           rep("Eagle", 4),
           rep("Rabbit", 4),
           rep("Dragonfly", 8),
           rep("Fox", 6),
           rep("Deer", 6),
           rep("Stream", 20),
           rep("Wolf", 6)
           )

bee_meadow <- c(rep("Bear", 6), 
           rep("Bee", 8), 
           rep("Meadow", 20),
           rep("Trout", 5),
           rep("Eagle", 4),
           rep("Rabbit", 4),
           rep("Dragonfly", 4),
           rep("Fox", 6),
           rep("Deer", 6),
           rep("Stream", 10),
           rep("Wolf", 6)
           )

low_eag_rab <- c(rep("Bear", 12), 
           rep("Bee", 8), 
           rep("Meadow", 20),
           rep("Trout", 10),
           rep("Eagle", 2),
           rep("Rabbit", 2),
           rep("Dragonfly", 8),
           rep("Fox", 12),
           rep("Deer", 12),
           rep("Stream", 20),
           rep("Wolf", 12)
           )

Shuffle old database

Code
# database_INCORRECT <- read.csv(here::here("database.csv"))
# database_INCORRECT <- database_INCORRECT %>%
#   arrange(pool)
Code
# dist <- database_INCORRECT %>% 
#   group_by(pool) %>%
#   summarize(count = n())
# dist
Code
# ID_NEW <- database_INCORRECT %>% 
#   select(ID)
# 
# database_INCORRECT <- database_INCORRECT %>% 
#   select(2:21)
Code
shuffle <- function(df) {

  shuffled_df <- as.data.frame(t(apply(df, 1, sample)))
  
  colnames(shuffled_df) <- colnames(df)
  
  if (!is.null(rownames(df))) {
    rownames(shuffled_df) <- rownames(df)
  }
  
  return(shuffled_df)
}
Code
# set.seed(88)
# shuffled_incorrect <- shuffle(database_INCORRECT)
Code
# write.csv(shuffled_incorrect, here::here("shuffled_database.csv"), row.names = FALSE)
Code
# shuffled_incorrect <- read.csv(here::here("shuffled_database.csv"))

Convert to function ready

Code
df_to_matrix_list <- function(df) {
  if (ncol(df) != 20) {
    stop("Dataframe must have exactly 20 columns")
  }
  
  matrix_list <- lapply(1:nrow(df), function(i) {
    matrix(as.character(df[i, ]),  # Keep as character
           nrow = 4, 
           ncol = 5, 
           byrow = TRUE,
           dimnames = list(NULL, paste0("col", 1:5)))  # Optional column names
  })
  
  if (!is.null(rownames(df))) {
    names(matrix_list) <- rownames(df)
  }
  
  return(matrix_list)
}
Code
# shuffled_grid_list <- df_to_matrix_list(shuffled_incorrect)

First Time

Code
# startTime <- Sys.time()
# 
# db_gen <- multi_mcmc(1000, 100, "annealing dynamic", 0.9, 200, record_board = TRUE, 
#                      boardlist=shuffled_grid_list[names(shuffled_grid_list)[1:100]], 
#                      card_name = "bee_meadow")
# endTime <- Sys.time()
# print(endTime - startTime)
# 
# write.csv(db_gen, here::here("database-CORRECTED.csv"), row.names = FALSE)

Subsequent

Code
# startTime <- Sys.time()
# 
# db_gen <- multi_mcmc(1000, 4000, "annealing dynamic", 0.9, 200, record_board = TRUE,
#                      boardlist=shuffled_grid_list[names(shuffled_grid_list)[46001:50000]], 
#                      card_name = "low_eagle_rabbit")
# endTime <- Sys.time()
# print(endTime - startTime)
# 
# database_old <- read.csv(here::here("database-CORRECTED.csv"))
# database_new <- rbind(database_old, db_gen)
# database_new <- database_new %>% distinct()
# write.csv(database_new, here::here("database-CORRECTED.csv"), row.names = FALSE)
Code
# head(db_gen)
Code
database_new <- read.csv(here::here("database-CORRECTED.csv"))
Code
dist2 <- database_new %>%
  group_by(pool) %>%
  summarize(count = n())
dist2
# A tibble: 4 × 2
  pool             count
  <chr>            <int>
1 bee_meadow        6000
2 default          30000
3 dragonfly_stream  6000
4 low_eagle_rabbit  8000
Code
# Add the original IDs back

# database_new_ID <- database_new %>%
#   mutate(ID = ID_NEW$ID) %>%
#   select(ID, 1:43) %>%
#   arrange(ID)

# write.csv(database_new_ID, here::here("database-CORRECTED.csv"), row.names = FALSE)

Displays

Code
database_new %>%
  mutate(score = as.numeric(score)) %>%
  filter(pool == "default") %>%
  ggplot(aes(x = score)) +
  geom_histogram(binwidth = 3, fill = "steelblue", color = "black")

Code
database_new %>%
  mutate(score = as.numeric(score)) %>%
  filter(pool == "dragonfly_stream") %>%
  ggplot(aes(x = score)) +
  geom_histogram(binwidth = 3, fill = "steelblue", color = "black")

Code
database_new %>%
  mutate(score = as.numeric(score)) %>%
  filter(pool == "bee_meadow") %>%
  ggplot(aes(x = score)) +
  geom_histogram(binwidth = 3, fill = "steelblue", color = "black")

Code
database_new %>%
  mutate(score = as.numeric(score)) %>%
  filter(pool == "low_eagle_rabbit") %>%
  ggplot(aes(x = score)) +
  geom_histogram(binwidth = 3, fill = "steelblue", color = "black")

Clustering

Code
# database_new <- read.csv(here::here("database.csv"))
# 
# database_new_ID <- database_new %>%
#   mutate(ID = rep(1:50000)) %>%
#   select(ID, 1:22)
# 
# write.csv(database_new_ID, here::here("database.csv"), row.names = FALSE)
Code
# database <- read.csv(here::here("database-CORRECTED.csv"))
Code
# database_new <- database %>%
#   select(1:23)
# 
# write.csv(database_new, here::here("database-CORRECTED-a.csv"), row.names = FALSE)
Code
# database_start <- database %>%
#   select(1,25:44,24)
# 
# write.csv(database_start, here::here("database-CORRECTED-s.csv"), row.names = FALSE)
Code
database_new <- read.csv(here::here("database-CORRECTED-a.csv"))
Code
grids <- database_new %>% select(-c(ID, pool))

dmy <- dummyVars(" ~ .", data = grids)

grids <- data.frame(predict(dmy, newdata = grids))

grids_matrix <- as.matrix(grids)
Code
set.seed(4)
km_spec1 <- k_means(num_clusters = 4)

grids_recipe <- recipe(~., data = grids_matrix)

km_wflow1 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec1)

km_fitted1 <- km_wflow1 |> fit(grids_matrix)

km_fitted1 |> extract_centroids()
# A tibble: 4 × 222
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0482      0.0403       0.137             0.0559        0.0353
2 Cluster…       0.0647      0.0284       0.125             0.0532        0.0325
3 Cluster…       0.0994      0.0198       0.111             0.0488        0.0344
4 Cluster…       0.158       0.0131       0.0968            0.0442        0.0539
# ℹ 216 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
grids_km1 <- kmeans(grids_matrix, centers = 4)

grids_km1$totss
[1] 4363055
Code
grids_km1$withinss
[1] 318486.3 196194.0 458591.5 371015.6
Code
grids_km1$betweenss
[1] 3018768
Code
x <- km_fitted1 |> extract_centroids()
x
# A tibble: 4 × 222
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0482      0.0403       0.137             0.0559        0.0353
2 Cluster…       0.0647      0.0284       0.125             0.0532        0.0325
3 Cluster…       0.0994      0.0198       0.111             0.0488        0.0344
4 Cluster…       0.158       0.0131       0.0968            0.0442        0.0539
# ℹ 216 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
pc <- prcomp(grids_matrix)
Code
cumul_vars <- cumsum(pc$sdev^2)/sum(pc$sdev^2)
cumul_vars
  [1] 0.7990713 0.8085696 0.8151198 0.8208002 0.8246353 0.8282929 0.8318508
  [8] 0.8350357 0.8379635 0.8407963 0.8435440 0.8462028 0.8487573 0.8512485
 [15] 0.8537064 0.8560526 0.8583538 0.8605829 0.8627621 0.8649096 0.8669842
 [22] 0.8690298 0.8710401 0.8730306 0.8749617 0.8768473 0.8787150 0.8805724
 [29] 0.8822680 0.8839363 0.8855721 0.8871926 0.8887514 0.8902934 0.8917863
 [36] 0.8932713 0.8947479 0.8961980 0.8975581 0.8989054 0.9002266 0.9015448
 [43] 0.9028343 0.9041140 0.9053820 0.9066372 0.9078713 0.9090997 0.9103152
 [50] 0.9115020 0.9126733 0.9138378 0.9149813 0.9161163 0.9172431 0.9183652
 [57] 0.9194728 0.9205711 0.9216547 0.9227280 0.9237937 0.9248461 0.9258916
 [64] 0.9269296 0.9279460 0.9289554 0.9299574 0.9309509 0.9319060 0.9328501
 [71] 0.9337880 0.9347152 0.9356375 0.9365395 0.9374277 0.9383124 0.9391930
 [78] 0.9400593 0.9409234 0.9417758 0.9426169 0.9434524 0.9442858 0.9451073
 [85] 0.9459235 0.9467280 0.9475270 0.9483193 0.9491100 0.9498936 0.9506737
 [92] 0.9514378 0.9521992 0.9529520 0.9536946 0.9544332 0.9551676 0.9558978
 [99] 0.9566178 0.9573257 0.9580284 0.9587258 0.9594118 0.9600901 0.9607608
[106] 0.9614246 0.9620849 0.9627360 0.9633820 0.9640142 0.9646441 0.9652636
[113] 0.9658721 0.9664730 0.9670685 0.9676560 0.9682394 0.9688168 0.9693920
[120] 0.9699625 0.9705296 0.9710928 0.9716514 0.9722043 0.9727534 0.9733000
[127] 0.9738434 0.9743822 0.9749177 0.9754473 0.9759608 0.9764647 0.9769677
[134] 0.9774678 0.9779626 0.9784514 0.9789357 0.9794191 0.9798949 0.9803567
[141] 0.9808163 0.9812710 0.9817239 0.9821709 0.9826135 0.9830502 0.9834828
[148] 0.9839049 0.9843214 0.9847349 0.9851440 0.9855439 0.9859416 0.9863376
[155] 0.9867263 0.9871117 0.9874910 0.9878659 0.9882330 0.9885971 0.9889546
[162] 0.9893095 0.9896598 0.9900045 0.9903462 0.9906723 0.9909957 0.9913162
[169] 0.9916327 0.9919474 0.9922569 0.9925661 0.9928735 0.9931770 0.9934799
[176] 0.9937808 0.9940796 0.9943759 0.9946708 0.9949610 0.9952431 0.9955192
[183] 0.9957940 0.9960661 0.9963357 0.9966025 0.9968679 0.9971311 0.9973906
[190] 0.9976455 0.9978960 0.9981376 0.9983756 0.9986124 0.9988470 0.9990717
[197] 0.9992804 0.9994651 0.9996469 0.9998269 1.0000000 1.0000000 1.0000000
[204] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
[211] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
[218] 1.0000000 1.0000000 1.0000000 1.0000000
Code
grids_reduced <- pc$x[, 1:8]

grids_pca_km <- kmeans(grids_reduced, 4)

grids_pca_km$totss
[1] 3643307
Code
grids_pca_km$withinss
[1] 186732.3 113977.2 127585.7 174209.0
Code
grids_pca_km$betweenss
[1] 3040803
Code
pc$rotation[,1:2]
                            PC1           PC2
row1col1Bear       3.525471e-03  1.208440e-02
row1col1Bee       -1.229508e-03  2.847542e-02
row1col1Deer      -1.563141e-03  6.438670e-03
row1col1Dragonfly -3.087862e-04 -4.493300e-02
row1col1Eagle      2.816748e-04 -1.600740e-02
row1col1Fox        1.203340e-04 -4.565968e-03
row1col1Meadow    -1.743163e-03  1.175701e-01
row1col1Rabbit    -7.911279e-04 -8.263966e-03
row1col1Stream    -5.985244e-04 -9.325707e-02
row1col1Trout     -1.094325e-03 -1.417474e-02
row1col1Wolf       3.401097e-03  1.663354e-02
row1col2Bear       2.230156e-03  8.241668e-03
row1col2Bee       -1.749250e-03  4.157861e-02
row1col2Deer      -4.099825e-04  1.430501e-02
row1col2Dragonfly  7.485636e-05 -3.830137e-02
row1col2Eagle     -6.510955e-04 -2.376168e-02
row1col2Fox        6.797353e-04  4.522287e-03
row1col2Meadow    -1.692704e-03  1.273517e-01
row1col2Rabbit    -4.789587e-04  6.355118e-03
row1col2Stream     1.589351e-03 -1.176590e-01
row1col2Trout     -2.403127e-03 -3.218066e-02
row1col2Wolf       2.811020e-03  9.548389e-03
row1col3Bear       1.692085e-03 -1.994705e-04
row1col3Bee       -1.895569e-03  1.638868e-03
row1col3Deer       3.658011e-04  2.325672e-03
row1col3Dragonfly -9.823765e-06 -2.775846e-03
row1col3Eagle     -1.099400e-03 -4.714311e-04
row1col3Fox        9.373601e-04 -3.936103e-04
row1col3Meadow    -1.803892e-03  3.662032e-03
row1col3Rabbit    -6.802968e-04  1.281781e-03
row1col3Stream     2.526429e-03 -5.500242e-03
row1col3Trout     -2.856538e-03 -8.311542e-04
row1col3Wolf       2.823844e-03  1.263401e-03
row1col4Bear       2.365416e-03 -5.615433e-03
row1col4Bee       -1.697269e-03 -3.939167e-02
row1col4Deer      -5.800104e-04 -1.056053e-02
row1col4Dragonfly  5.442077e-05  3.708769e-02
row1col4Eagle     -6.695388e-04  2.251931e-02
row1col4Fox        9.502406e-04 -3.137602e-03
row1col4Meadow    -2.057133e-03 -1.243571e-01
row1col4Rabbit    -4.448784e-04 -3.930111e-03
row1col4Stream     1.585734e-03  1.103649e-01
row1col4Trout     -2.303197e-03  3.042570e-02
row1col4Wolf       2.796215e-03 -1.340519e-02
row1col5Bear       3.306183e-03 -1.082415e-02
row1col5Bee       -1.291068e-03 -2.647484e-02
row1col5Deer      -1.652200e-03 -8.678304e-03
row1col5Dragonfly -2.482084e-04  4.608562e-02
row1col5Eagle      1.513419e-04  1.596037e-02
row1col5Fox        2.521500e-04  6.025699e-04
row1col5Meadow    -1.891161e-03 -1.149612e-01
row1col5Rabbit    -8.049679e-04  8.140825e-03
row1col5Stream    -3.730683e-04  8.427917e-02
row1col5Trout     -9.794445e-04  1.492748e-02
row1col5Wolf       3.530443e-03 -9.057571e-03
row2col1Bear       2.706502e-03  7.159172e-03
row2col1Bee       -1.838939e-03  6.279956e-02
row2col1Deer       2.658413e-04  8.714832e-03
row2col1Dragonfly  4.202294e-06 -5.758599e-02
row2col1Eagle     -7.078046e-04 -2.305346e-02
row2col1Fox        8.849161e-04  1.046862e-04
row2col1Meadow    -3.225206e-03  2.295829e-01
row2col1Rabbit    -1.196575e-04 -4.273469e-03
row2col1Stream     9.758095e-04 -1.783810e-01
row2col1Trout     -2.431067e-03 -5.204120e-02
row2col1Wolf       3.485403e-03  6.973955e-03
row2col2Bear       6.318311e-04  9.754261e-03
row2col2Bee       -3.254568e-03  1.064106e-01
row2col2Deer       6.514257e-04  6.540492e-03
row2col2Dragonfly  3.439755e-04 -3.858544e-02
row2col2Eagle     -1.309748e-03 -4.043001e-03
row2col2Fox        9.031619e-04  4.278362e-03
row2col2Meadow    -1.831383e-03  2.016166e-01
row2col2Rabbit     3.424934e-04  1.757391e-03
row2col2Stream     5.653596e-03 -1.870163e-01
row2col2Trout     -3.825911e-03 -1.058061e-01
row2col2Wolf       1.695126e-03  5.093176e-03
row2col3Bear      -1.357112e-03  3.820824e-03
row2col3Bee       -3.407352e-03 -3.877209e-05
row2col3Deer       8.141742e-04 -6.092493e-04
row2col3Dragonfly  8.229033e-04 -6.379700e-04
row2col3Eagle     -1.935517e-03  5.162879e-04
row2col3Fox        8.657173e-04  4.248977e-04
row2col3Meadow    -1.361598e-03  2.622730e-03
row2col3Rabbit     4.002840e-04  8.882283e-04
row2col3Stream     7.389636e-03 -2.156863e-03
row2col3Trout     -3.840094e-03 -4.711572e-03
row2col3Wolf       1.608960e-03 -1.185411e-04
row2col4Bear       3.132284e-04 -1.220364e-02
row2col4Bee       -3.446916e-03 -1.007165e-01
row2col4Deer       8.519385e-04 -6.969672e-03
row2col4Dragonfly  3.758565e-04  3.566678e-02
row2col4Eagle     -1.356372e-03  5.270959e-03
row2col4Fox        1.023162e-03 -4.198022e-03
row2col4Meadow    -1.841906e-03 -1.999834e-01
row2col4Rabbit     4.245672e-04 -1.213776e-03
row2col4Stream     5.593015e-03  1.811398e-01
row2col4Trout     -3.819017e-03  1.079895e-01
row2col4Wolf       1.882444e-03 -4.782140e-03
row2col5Bear       2.621287e-03 -6.284689e-03
row2col5Bee       -1.882357e-03 -6.418720e-02
row2col5Deer       4.777415e-04 -8.721750e-03
row2col5Dragonfly -2.887361e-04  5.823075e-02
row2col5Eagle     -4.074980e-04  2.091108e-02
row2col5Fox        1.017649e-03 -4.306659e-03
row2col5Meadow    -3.389228e-03 -2.198540e-01
row2col5Rabbit    -1.236417e-04  4.315750e-03
row2col5Stream     1.049571e-03  1.751737e-01
row2col5Trout     -2.453641e-03  5.029976e-02
row2col5Wolf       3.378854e-03 -5.576761e-03
row3col1Bear       3.021285e-03  6.968097e-03
row3col1Bee       -1.844641e-03  6.236495e-02
row3col1Deer      -6.548834e-05  4.560889e-03
row3col1Dragonfly -1.132292e-04 -5.763261e-02
row3col1Eagle     -4.896807e-04 -2.211167e-02
row3col1Fox        1.135382e-03  1.901011e-03
row3col1Meadow    -3.336063e-03  2.266424e-01
row3col1Rabbit    -2.577588e-04 -3.049764e-03
row3col1Stream     1.147342e-03 -1.775896e-01
row3col1Trout     -2.451324e-03 -4.997501e-02
row3col1Wolf       3.254177e-03  7.921327e-03
row3col2Bear       5.290658e-04  9.130014e-03
row3col2Bee       -3.590559e-03  1.046187e-01
row3col2Deer       8.097848e-04  6.023265e-03
row3col2Dragonfly  4.689507e-04 -3.495873e-02
row3col2Eagle     -1.373730e-03 -4.809535e-03
row3col2Fox        8.928715e-04  4.596970e-03
row3col2Meadow    -1.779388e-03  1.977747e-01
row3col2Rabbit     3.249929e-04  1.881293e-03
row3col2Stream     5.416676e-03 -1.820271e-01
row3col2Trout     -3.600518e-03 -1.079460e-01
row3col2Wolf       1.901853e-03  5.716463e-03
row3col3Bear      -1.323309e-03 -3.366134e-03
row3col3Bee       -3.391096e-03  5.036990e-04
row3col3Deer       8.861450e-04  5.779024e-04
row3col3Dragonfly  6.810407e-04 -9.540905e-04
row3col3Eagle     -1.882615e-03  8.591630e-04
row3col3Fox        8.590569e-04 -9.611547e-04
row3col3Meadow    -1.433720e-03 -2.669119e-03
row3col3Rabbit     2.900880e-04  1.265972e-04
row3col3Stream     7.324821e-03  4.413155e-03
row3col3Trout     -3.725313e-03  1.426930e-03
row3col3Wolf       1.714900e-03  4.305197e-05
row3col4Bear       7.439585e-04 -8.508973e-03
row3col4Bee       -3.702024e-03 -1.047104e-01
row3col4Deer       8.095716e-04 -7.273598e-03
row3col4Dragonfly  1.990299e-04  3.849704e-02
row3col4Eagle     -9.602989e-04  4.975071e-03
row3col4Fox        8.586065e-04 -4.026960e-03
row3col4Meadow    -2.095723e-03 -2.018740e-01
row3col4Rabbit     4.226133e-04 -2.878118e-03
row3col4Stream     5.606404e-03  1.869644e-01
row3col4Trout     -3.727345e-03  1.050996e-01
row3col4Wolf       1.845206e-03 -6.264071e-03
row3col5Bear       2.507376e-03 -8.044082e-03
row3col5Bee       -1.792411e-03 -6.145913e-02
row3col5Deer      -2.027287e-05 -7.788495e-03
row3col5Dragonfly  1.770525e-06  5.697001e-02
row3col5Eagle     -3.208647e-04  2.078623e-02
row3col5Fox        1.300581e-03 -1.069027e-03
row3col5Meadow    -3.776937e-03 -2.268463e-01
row3col5Rabbit     7.485456e-06  2.727875e-03
row3col5Stream     1.143120e-03  1.790783e-01
row3col5Trout     -2.488087e-03  5.351694e-02
row3col5Wolf       3.438240e-03 -7.872314e-03
row4col1Bear       3.299139e-03  1.113669e-02
row4col1Bee       -1.304996e-03  2.835685e-02
row4col1Deer      -1.478687e-03  9.310936e-03
row4col1Dragonfly -2.261399e-04 -4.420473e-02
row4col1Eagle      3.721683e-04 -1.546505e-02
row4col1Fox       -1.073184e-04 -4.167611e-03
row4col1Meadow    -1.800250e-03  1.142681e-01
row4col1Rabbit    -4.715558e-04 -1.117283e-02
row4col1Stream    -2.436098e-04 -8.676783e-02
row4col1Trout     -1.028451e-03 -1.397925e-02
row4col1Wolf       2.989700e-03  1.268477e-02
row4col2Bear       2.687398e-03  8.091017e-03
row4col2Bee       -1.667118e-03  3.896600e-02
row4col2Deer      -4.883495e-04  9.704055e-03
row4col2Dragonfly  1.255462e-04 -3.683669e-02
row4col2Eagle     -7.586840e-04 -2.241905e-02
row4col2Fox        4.013194e-04  3.350299e-03
row4col2Meadow    -2.023243e-03  1.264752e-01
row4col2Rabbit    -5.476238e-04  4.390553e-03
row4col2Stream     1.591733e-03 -1.091139e-01
row4col2Trout     -2.332282e-03 -2.868451e-02
row4col2Wolf       3.011304e-03  6.076988e-03
row4col3Bear       1.860172e-03 -3.426441e-03
row4col3Bee       -1.874538e-03 -1.026529e-03
row4col3Deer       3.319346e-04 -5.533426e-04
row4col3Dragonfly -6.885821e-05  2.031192e-03
row4col3Eagle     -7.901326e-04 -4.173972e-04
row4col3Fox        8.595626e-04  1.367720e-03
row4col3Meadow    -1.962354e-03 -1.896016e-03
row4col3Rabbit    -7.221047e-04 -2.335551e-04
row4col3Stream     2.521984e-03  5.623811e-03
row4col3Trout     -2.702473e-03  1.131848e-03
row4col3Wolf       2.546807e-03 -2.601291e-03
row4col4Bear       2.351488e-03 -7.469977e-03
row4col4Bee       -1.783549e-03 -4.193343e-02
row4col4Deer      -2.845689e-04 -1.179528e-02
row4col4Dragonfly  2.056544e-05  3.858043e-02
row4col4Eagle     -5.826719e-04  2.134230e-02
row4col4Fox        7.637383e-04 -6.255920e-03
row4col4Meadow    -2.166699e-03 -1.303242e-01
row4col4Rabbit    -5.412153e-04 -4.560138e-03
row4col4Stream     1.611892e-03  1.154778e-01
row4col4Trout     -2.427926e-03  3.594741e-02
row4col4Wolf       3.038946e-03 -9.008968e-03
row4col5Bear       3.396172e-03 -1.255136e-02
row4col5Bee       -1.389434e-03 -2.830883e-02
row4col5Deer      -1.828637e-03 -8.506739e-03
row4col5Dragonfly -2.623748e-04  4.728809e-02
row4col5Eagle      2.895924e-04  1.659036e-02
row4col5Fox       -3.302766e-05  2.067047e-03
row4col5Meadow    -1.872072e-03 -1.179045e-01
row4col5Rabbit    -2.639154e-04  1.093532e-02
row4col5Stream    -3.977629e-04  8.842771e-02
row4col5Trout     -1.018393e-03  1.319532e-02
row4col5Wolf       3.379853e-03 -1.123246e-02
score             -9.995262e-01  3.171744e-04

With score removed

Code
# No lowest
grids <- database_new %>%
  filter(score > 57) %>%
  select(-c(ID, pool))

dmy <- dummyVars(" ~ .", data = grids)

grids <- data.frame(predict(dmy, newdata = grids))
Code
grids_noscore <- grids %>%
  select(-c(score))

noscore_matrix <- as.matrix(grids_noscore)
Code
# write.csv(grids_noscore, here::here("grids_noscore-CORRECTED.csv"), row.names = FALSE)
Code
set.seed(4)
km_spec2 <- k_means(num_clusters = 3)
grids_recipe <- recipe(~., data = noscore_matrix)

km_wflow2 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec2)

km_fitted2 <- km_wflow2 |> fit(noscore_matrix)

km_fitted2 |> extract_centroids()
# A tibble: 3 × 221
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0494     0.00369       0.127            0.0927        0.0518 
2 Cluster…       0.0605     0.102         0.0848           0.00277       0.00682
3 Cluster…       0.0656     0.0153        0.184            0.0437        0.0314 
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
grids_km2 <- kmeans(noscore_matrix, centers = 3)

grids_km2$totss
[1] 646241.3
Code
grids_km2$withinss
[1] 164572.4 283239.0 162041.1
Code
grids_km2$betweenss
[1] 36388.78
Code
set.seed(4)
km_spec2 <- k_means(num_clusters = 4)
grids_recipe <- recipe(~., data = noscore_matrix)

km_wflow2 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec2)

km_fitted2 <- km_wflow2 |> fit(noscore_matrix)

km_fitted2 |> extract_centroids()
# A tibble: 4 × 221
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0450     0.00143       0.0818            0.122         0.0624
2 Cluster…       0.0905     0.0450        0.124             0.0183        0.0102
3 Cluster…       0.0368     0.0818        0.142             0.0152        0.0199
4 Cluster…       0.0573     0.0129        0.179             0.0539        0.0389
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
grids_km2 <- kmeans(noscore_matrix, centers = 4)

grids_km2$totss
[1] 646241.3
Code
grids_km2$withinss
[1] 145872.1 144632.4 153272.9 154573.8
Code
grids_km2$betweenss
[1] 47890.06
Code
set.seed(4)
km_spec2 <- k_means(num_clusters = 5)
grids_recipe <- recipe(~., data = noscore_matrix)

km_wflow2 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec2)

km_fitted2 <- km_wflow2 |> fit(noscore_matrix)

km_fitted2 |> extract_centroids()
# A tibble: 5 × 221
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0452     0.00187       0.0774           0.121         0.0643 
2 Cluster…       0.0915     0.0670        0.0873           0.00583       0.00636
3 Cluster…       0.0279     0.126         0.101            0.00438       0.00666
4 Cluster…       0.0687     0.00912       0.192            0.0451        0.0307 
5 Cluster…       0.0556     0.00726       0.183            0.0574        0.0403 
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
grids_km2 <- kmeans(noscore_matrix, centers = 5)

grids_km2$totss
[1] 646241.3
Code
grids_km2$withinss
[1]  87063.98 140905.89 123631.30 143523.59  95218.65
Code
grids_km2$betweenss
[1] 55897.84
Code
set.seed(4)
km_spec2 <- k_means(num_clusters = 6)
grids_recipe <- recipe(~., data = noscore_matrix)

km_wflow2 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec2)

km_fitted2 <- km_wflow2 |> fit(noscore_matrix)

km_fitted2 |> extract_centroids()
# A tibble: 6 × 221
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0348     0.00278       0.0728           0.112         0.0869 
2 Cluster…       0.0960     0.0721        0.0857           0.00465       0.00733
3 Cluster…       0.0235     0.124         0.111            0.00491       0.00655
4 Cluster…       0.0731     0.00710       0.189            0.0462        0.0281 
5 Cluster…       0.0757     0.00208       0.127            0.100         0.0121 
6 Cluster…       0.0358     0.0152        0.201            0.0424        0.0541 
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
grids_km2 <- kmeans(noscore_matrix, centers = 6)

grids_km2$totss
[1] 646241.3
Code
grids_km2$withinss
[1] 107160.59  84558.43 102995.44  85750.57 104087.50 105745.73
Code
grids_km2$betweenss
[1] 55942.98
Code
set.seed(4)
km_spec2 <- k_means(num_clusters = 7)
grids_recipe <- recipe(~., data = noscore_matrix)

km_wflow2 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec2)

km_fitted2 <- km_wflow2 |> fit(noscore_matrix)

km_fitted2 |> extract_centroids()
# A tibble: 7 × 221
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0183     0.00381       0.0782           0.0816        0.110  
2 Cluster…       0.0954     0.0714        0.0853           0.00475       0.00694
3 Cluster…       0.0234     0.129         0.106            0.00449       0.00636
4 Cluster…       0.0737     0.00746       0.192            0.0450        0.0281 
5 Cluster…       0.0666     0.00120       0.0899           0.153         0.0185 
6 Cluster…       0.0796     0.00485       0.152            0.0741        0.0221 
7 Cluster…       0.0333     0.0149        0.200            0.0434        0.0469 
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
grids_km2 <- kmeans(noscore_matrix, centers = 7)

grids_km2$totss
[1] 646241.3
Code
grids_km2$withinss
[1]  65988.46  78541.83  88200.81  81343.84  84341.52  65373.08 114440.56
Code
grids_km2$betweenss
[1] 68011.17
Code
set.seed(4)
km_spec2 <- k_means(num_clusters = 8)
grids_recipe <- recipe(~., data = noscore_matrix)

km_wflow2 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec2)

km_fitted2 <- km_wflow2 |> fit(noscore_matrix)

km_fitted2 |> extract_centroids()
# A tibble: 8 × 221
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0188     0.00332       0.0760           0.0846        0.110  
2 Cluster…       0.0983     0.0730        0.0840           0.00492       0.00846
3 Cluster…       0.0152     0.150         0.0706           0.00199       0.00398
4 Cluster…       0.101      0.00809       0.165            0.0468        0.0134 
5 Cluster…       0.0670     0.00121       0.0910           0.152         0.0192 
6 Cluster…       0.0798     0.00406       0.152            0.0754        0.0218 
7 Cluster…       0.0395     0.0297        0.209            0.0285        0.0330 
8 Cluster…       0.0340     0.0113        0.201            0.0446        0.0491 
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
grids_km2 <- kmeans(noscore_matrix, centers = 8)

grids_km2$totss
[1] 646241.3
Code
grids_km2$withinss
[1]  54845.88  75562.18  76206.71 107942.76  56648.50  77665.17  64363.53
[8]  61319.18
Code
grids_km2$betweenss
[1] 71687.35
Code
km_fitted2 |> extract_centroids()
# A tibble: 8 × 221
  .cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
  <fct>           <dbl>       <dbl>        <dbl>             <dbl>         <dbl>
1 Cluster…       0.0188     0.00332       0.0760           0.0846        0.110  
2 Cluster…       0.0983     0.0730        0.0840           0.00492       0.00846
3 Cluster…       0.0152     0.150         0.0706           0.00199       0.00398
4 Cluster…       0.101      0.00809       0.165            0.0468        0.0134 
5 Cluster…       0.0670     0.00121       0.0910           0.152         0.0192 
6 Cluster…       0.0798     0.00406       0.152            0.0754        0.0218 
7 Cluster…       0.0395     0.0297        0.209            0.0285        0.0330 
8 Cluster…       0.0340     0.0113        0.201            0.0446        0.0491 
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
#   row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
#   row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
#   row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
#   row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
#   row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
#   row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
Code
km_fitted2 %>% extract_cluster_assignment()
# A tibble: 36,708 × 1
   .cluster 
   <fct>    
 1 Cluster_1
 2 Cluster_2
 3 Cluster_3
 4 Cluster_3
 5 Cluster_1
 6 Cluster_4
 7 Cluster_5
 8 Cluster_4
 9 Cluster_5
10 Cluster_3
# ℹ 36,698 more rows
Code
database_new %>%
  summarize(mean_score=mean(score),
            sd_score=sd(score),
            min = min(score),
            p25th=quantile(score,probs=0.25),
            median=quantile(score,probs=0.5),
            p75th=quantile(score,probs=0.75),
            max=max(score),
            count=n())
  mean_score sd_score min p25th median p75th max count
1   61.67342 8.346453  17    57     63    67  88 50000
Code
database_lowest_removed <- database_new %>%
  filter(score > 57)
Code
database1 <- database_lowest_removed %>%
  mutate(`7cluster` = extract_cluster_assignment(km_fitted2)$.cluster)
Code
summarized <- database1 %>%
  group_by(`7cluster`) %>%
  summarise(mean_score=mean(score),
            sd_score=sd(score),
            .groups = 'drop') 
summarized
# A tibble: 8 × 3
  `7cluster` mean_score sd_score
  <fct>           <dbl>    <dbl>
1 Cluster_1        65.5     4.96
2 Cluster_2        65.6     4.97
3 Cluster_3        65.6     5.00
4 Cluster_4        65.4     4.83
5 Cluster_5        65.6     4.90
6 Cluster_6        65.6     4.99
7 Cluster_7        65.6     4.99
8 Cluster_8        65.5     4.92
Code
# write.csv(summarized, here::here("score_removed_c_summary.csv"), row.names = FALSE)
Code
# database1 %>%
#   group_by(cluster) %>%
#   summarise(mean_score=mean(score),
#             sd_score=sd(score),
#             count=n(),
#             .groups = 'drop') 
Code
# database1 %>%
#   group_by(cluster) %>%
#   summarise(mean_score=mean(score),
#             sd_score=sd(score),
#             count=n(),
#             .groups = 'drop') 

With individual scores and lowest removed

Code
summary(database_new$score)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  17.00   57.00   63.00   61.67   67.00   88.00 
Code
database_new3 <- database_new %>%
  filter(score > 57) %>%
  select(-c(ID,pool,score))

ID_cols <- database_new %>%
  filter(score > 57) %>%
  select(ID)
Code
# Run once

# first_row <- score_grid(matrix(c(t(database_new3[1,])),nrow=4,ncol=5,byrow=T), individual=TRUE)
# 
# db_individual <- data.frame(
#   bear_score = c(first_row[1]),
#   bee_score = c(first_row[2]),
#   meadow_score = c(first_row[3]),
#   trout_score = c(first_row[4]),
#   eagle_score = c(first_row[5]),
#   rabbit_score = c(first_row[6]),
#   dragonfly_score = c(first_row[7]),
#   fox_score = c(first_row[8]),
#   deer_score = c(first_row[9]),
#   stream_score = c(first_row[10]),
#   wolves_score = c(first_row[11]),
#   dv_score = c(first_row[12])
# )
# 
# for(i in 2:nrow(database_new3)){
#   row <- as.list(score_grid(matrix(c(t(database_new3[i,])),nrow=4,ncol=5,byrow=T), individual=TRUE))
#   db_individual <- rbind(db_individual, row)
# }
Code
# db_individual_ID <- db_individual %>%
#   mutate(ID = ID_cols$ID) %>%
#   select(ID, 1:12)
# 
# write.csv(db_individual_ID, here::here("db_individual_lowest_removed-CORRECTED.csv"), row.names = FALSE)
Code
db_individual_removed <- read.csv(here::here("db_individual_lowest_removed-CORRECTED.csv"))

db_individual_removed <- db_individual_removed %>%
  select(-ID)

individual_matrix <- as.matrix(db_individual_removed)
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 3)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 3 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1       6.47      5.48         5.08        6.39        2.85
2 Cluster_2       5.13     13.4         13.4         4.49        3.27
3 Cluster_3       6.07      4.51         5.51        7.59       12.5 
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 3)

grids_km3$totss
[1] 7106704
Code
grids_km3$withinss
[1] 1444484 2254308 1430584
Code
grids_km3$betweenss
[1] 1977328
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 4)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 4 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1       8.51      6.75         5.27        5.96        3.30
2 Cluster_2       3.77      4.09         4.98        7.15        3.41
3 Cluster_3       4.70     13.6         13.9         4.47        3.29
4 Cluster_4       5.37      4.22         5.75        7.69       13.3 
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 4)

grids_km3$totss
[1] 7106704
Code
grids_km3$withinss
[1] 1186674.7 1251643.8 1452332.0  760011.4
Code
grids_km3$betweenss
[1] 2456042
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 5)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 5 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1       4.39      4.82         7.46        5.75        3.84
2 Cluster_2       3.91      4.23         4.45        7.25        3.47
3 Cluster_3       4.59     13.8         14.0         4.45        3.32
4 Cluster_4       5.34      4.35         5.26        7.76       14.2 
5 Cluster_5      11.4       8.04         4.37        6.47        3.82
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 5)

grids_km3$totss
[1] 7106704
Code
grids_km3$withinss
[1]  736378.0  871435.4 1106223.0  659987.2 1002434.4
Code
grids_km3$betweenss
[1] 2730246
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 6)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 6 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1       4.85      5.49         4.96        5.97        3.95
2 Cluster_2       3.87      4.16         4.33        7.28        3.45
3 Cluster_3       4.97     18.7         13.0         3.98        3.00
4 Cluster_4       4.34      8.36        13.8         5.11        3.90
5 Cluster_5       5.33      4.36         5.05        7.78       14.6 
6 Cluster_6      12.0       7.59         4.65        6.53        3.86
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 6)

grids_km3$totss
[1] 7106704
Code
grids_km3$withinss
[1] 825052.1 663096.3 580316.3 855767.0 693384.0 510255.8
Code
grids_km3$betweenss
[1] 2978833
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 7)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 7 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1       5.05      5.66         5.11        5.07        3.68
2 Cluster_2       3.88      4.20         4.30        7.04        3.36
3 Cluster_3       4.91     18.8         13.0         3.94        3.00
4 Cluster_4       4.37      8.49        13.9         4.83        3.71
5 Cluster_5       5.53      4.49         5.24        6.01       16.0 
6 Cluster_6      12.5       7.69         4.72        5.84        3.79
7 Cluster_7       4.85      4.84         5.18       12.3         7.68
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 7)

grids_km3$totss
[1] 7106704
Code
grids_km3$withinss
[1] 633270.6 555515.0 616836.7 645917.9 472100.9 571064.9 485252.6
Code
grids_km3$betweenss
[1] 3126746
Code
centroids <- km_fitted3 |> extract_centroids()
centroids
# A tibble: 7 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1       5.05      5.66         5.11        5.07        3.68
2 Cluster_2       3.88      4.20         4.30        7.04        3.36
3 Cluster_3       4.91     18.8         13.0         3.94        3.00
4 Cluster_4       4.37      8.49        13.9         4.83        3.71
5 Cluster_5       5.53      4.49         5.24        6.01       16.0 
6 Cluster_6      12.5       7.69         4.72        5.84        3.79
7 Cluster_7       4.85      4.84         5.18       12.3         7.68
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
database_lowest_removed <- database_new %>%
  filter(score > 57)
Code
database3 <- database_lowest_removed %>%
  mutate(cluster = extract_cluster_assignment(km_fitted3)$.cluster)
Code
database3 %>%
  group_by(cluster) %>%
  summarize(mean_score=mean(score),
            sd_score=sd(score),
            p25th=quantile(score,probs=0.25),
            median=quantile(score,probs=0.5),
            p75th=quantile(score,probs=0.75),
            count=n(),
            .groups = 'drop')
# A tibble: 7 × 7
  cluster   mean_score sd_score p25th median p75th count
  <fct>          <dbl>    <dbl> <dbl>  <dbl> <dbl> <int>
1 Cluster_1       63.2     3.67    60     63    66  5928
2 Cluster_2       63.9     3.98    61     63    67  4586
3 Cluster_3       69.4     5.42    66     70    73  3999
4 Cluster_4       66.0     4.91    62     66    70  7613
5 Cluster_5       67.4     5.10    63     67    71  4899
6 Cluster_6       64.7     4.46    61     64    68  5773
7 Cluster_7       65.4     4.41    62     65    68  3910

Normalizing

Code
db_individual_removed <- read.csv(here::here("db_individual_lowest_removed-CORRECTED.csv"))

db_individual_removed <- db_individual_removed %>%
  select(-ID)
Code
db_scaled <- data.frame(lapply(db_individual_removed,scale))
Code
individual_matrix <- as.matrix(db_scaled)
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 3)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 3 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1     0.230     -0.400       -0.471       0.243       0.649
2 Cluster_2    -0.290     -0.461       -0.449       0.199      -0.375
3 Cluster_3    -0.0605     0.798        0.872      -0.426      -0.489
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 3)

grids_km3$totss
[1] 440484
Code
grids_km3$withinss
[1]  84964.63 144453.23 121999.44
Code
grids_km3$betweenss
[1] 89066.7
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 4)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 4 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1      0.558    -0.217       -0.395      0.0921      -0.326
2 Cluster_2     -0.377    -0.477       -0.442      0.221       -0.363
3 Cluster_3     -0.249     1.06         1.15      -0.461       -0.462
4 Cluster_4     -0.169    -0.422       -0.302      0.175        1.30 
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 4)

grids_km3$totss
[1] 440484
Code
grids_km3$withinss
[1] 75773.09 83868.58 68995.63 98334.26
Code
grids_km3$betweenss
[1] 113512.4
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 5)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 5 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1      1.41    -0.0630       -0.424       0.256      -0.187
2 Cluster_2     -0.409   -0.483        -0.448       0.248      -0.357
3 Cluster_3     -0.302    1.14          1.20       -0.468      -0.457
4 Cluster_4     -0.217   -0.433        -0.303       0.196       1.41 
5 Cluster_5     -0.302   -0.294        -0.208      -0.120      -0.331
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 5)

grids_km3$totss
[1] 440484
Code
grids_km3$withinss
[1] 46251.37 61879.28 80191.68 56789.71 66088.32
Code
grids_km3$betweenss
[1] 129283.6
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 6)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 6 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1      1.58   -0.00761       -0.401     -0.0587      -0.245
2 Cluster_2     -0.406  -0.493         -0.462      0.258       -0.361
3 Cluster_3     -0.301   1.20           1.24      -0.520       -0.487
4 Cluster_4     -0.170  -0.405         -0.260     -0.233        1.42 
5 Cluster_5     -0.269  -0.247         -0.162     -0.398       -0.409
6 Cluster_6     -0.235  -0.322         -0.265      1.58         0.456
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 6)

grids_km3$totss
[1] 440484
Code
grids_km3$withinss
[1] 26449.07 55431.15 45414.34 48547.69 58705.05 67206.20
Code
grids_km3$betweenss
[1] 138730.5
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 7)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 7 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1      1.59    -0.0670       -0.495     -0.0385      -0.214
2 Cluster_2     -0.407   -0.521        -0.499      0.271       -0.349
3 Cluster_3     -0.282    0.859         1.05      -0.465       -0.315
4 Cluster_4     -0.173   -0.461        -0.333     -0.209        1.53 
5 Cluster_5     -0.240   -0.332        -0.324     -0.336       -0.365
6 Cluster_6     -0.224   -0.374        -0.343      1.62         0.537
7 Cluster_7     -0.161    1.16          1.16      -0.409       -0.668
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 7)

grids_km3$totss
[1] 440484
Code
grids_km3$withinss
[1] 37696.54 50293.45 36513.09 48488.26 51990.11 36633.54 30047.61
Code
grids_km3$betweenss
[1] 148821.4
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 8)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 8 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1     -0.210   -0.311        -0.245     -0.287       -0.301
2 Cluster_2     -0.111   -0.306        -0.293     -0.201       -0.260
3 Cluster_3     -0.278    0.928         1.08      -0.478       -0.302
4 Cluster_4     -0.165   -0.454        -0.336     -0.188        1.61 
5 Cluster_5     -0.224   -0.359        -0.324      1.71         0.564
6 Cluster_6     -0.174    1.23          1.20      -0.433       -0.676
7 Cluster_7     -0.416   -0.520        -0.502      0.290       -0.355
8 Cluster_8      1.69    -0.0506       -0.484     -0.0154      -0.213
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 8)

grids_km3$totss
[1] 440484
Code
grids_km3$withinss
[1] 49105.88 32733.17 33089.61 32935.30 27006.21 27630.43 37460.55 38537.18
Code
grids_km3$betweenss
[1] 161985.7
Code
set.seed(4)
km_spec3 <- k_means(num_clusters = 9)
grids_recipe <- recipe(~., data = individual_matrix)

km_wflow3 <- workflow() |>
  add_recipe(grids_recipe) |>
  add_model(km_spec3)

km_fitted3 <- km_wflow3 |> fit(individual_matrix)

km_fitted3 |> extract_centroids()
# A tibble: 9 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1    -0.195    -0.287        -0.201     -0.300       -0.294
2 Cluster_2    -0.0671   -0.291        -0.284     -0.213       -0.245
3 Cluster_3    -0.277     0.956         1.10      -0.477       -0.301
4 Cluster_4    -0.164    -0.450        -0.330     -0.192        1.63 
5 Cluster_5    -0.315    -0.405        -0.466      0.0971      -0.260
6 Cluster_6    -0.217    -0.363        -0.336      1.73         0.583
7 Cluster_7    -0.182     1.27          1.21      -0.453       -0.675
8 Cluster_8    -0.495    -0.684        -0.444      0.598       -0.537
9 Cluster_9     1.73     -0.0376       -0.457     -0.0274      -0.207
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
grids_km3 <- kmeans(individual_matrix, centers = 9)

grids_km3$totss
[1] 440484
Code
grids_km3$withinss
[1] 27946.96 28548.22 30640.87 24746.53 22405.00 31390.59 35361.58 23582.96
[9] 46064.03
Code
grids_km3$betweenss
[1] 169797.3
Code
# wcss <- data.frame(
#   k = rep(7, 7),
#   wcss = grids_km3$withinss
# )
Code
# wcss2 <- data.frame(
#   k = rep(9, 9),
#   wcss = grids_km3$withinss
# )
# 
# wcss <- rbind(wcss, wcss2)
Code
# write.csv(wcss, here::here("wcss-CORRECTED.csv"), row.names = FALSE)
Code
wcss <- read.csv(here::here("wcss-CORRECTED.csv"))
Code
wcss_means <- wcss %>%
  group_by(k) %>%
  summarize(mean = mean(wcss))
wcss_means
# A tibble: 7 × 2
      k    mean
  <int>   <dbl>
1     3 117139.
2     4  81743.
3     5  62240.
4     6  50292.
5     7  41666.
6     8  34812.
7     9  30076.
Code
ggplot(aes(x = k, y = wcss), data=wcss) +
  geom_point() +
  geom_point(aes(x=k, y=mean, color="red"), data=wcss_means) +
  labs(x = "K", y="Within Cluster Sums of Square") +
  theme_minimal() +
  theme(panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank(),
        legend.position = "none")

Code
centroids <- km_fitted3 |> extract_centroids()
centroids
# A tibble: 9 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1    -0.195    -0.287        -0.201     -0.300       -0.294
2 Cluster_2    -0.0671   -0.291        -0.284     -0.213       -0.245
3 Cluster_3    -0.277     0.956         1.10      -0.477       -0.301
4 Cluster_4    -0.164    -0.450        -0.330     -0.192        1.63 
5 Cluster_5    -0.315    -0.405        -0.466      0.0971      -0.260
6 Cluster_6    -0.217    -0.363        -0.336      1.73         0.583
7 Cluster_7    -0.182     1.27          1.21      -0.453       -0.675
8 Cluster_8    -0.495    -0.684        -0.444      0.598       -0.537
9 Cluster_9     1.73     -0.0376       -0.457     -0.0274      -0.207
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
centroids <- km_fitted3 |> extract_centroids()
centroids
# A tibble: 9 × 13
  .cluster  bear_score bee_score meadow_score trout_score eagle_score
  <fct>          <dbl>     <dbl>        <dbl>       <dbl>       <dbl>
1 Cluster_1    -0.195    -0.287        -0.201     -0.300       -0.294
2 Cluster_2    -0.0671   -0.291        -0.284     -0.213       -0.245
3 Cluster_3    -0.277     0.956         1.10      -0.477       -0.301
4 Cluster_4    -0.164    -0.450        -0.330     -0.192        1.63 
5 Cluster_5    -0.315    -0.405        -0.466      0.0971      -0.260
6 Cluster_6    -0.217    -0.363        -0.336      1.73         0.583
7 Cluster_7    -0.182     1.27          1.21      -0.453       -0.675
8 Cluster_8    -0.495    -0.684        -0.444      0.598       -0.537
9 Cluster_9     1.73     -0.0376       -0.457     -0.0274      -0.207
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
#   fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
#   dv_score <dbl>
Code
database_lowest_removed <- database_new %>%
  filter(score > 57)
Code
# database4 <- database_lowest_removed %>%
#   mutate(`7cluster_seed4` = extract_cluster_assignment(km_fitted3)$.cluster)
Code
# database4 <- database4 %>%
#   mutate(`7cluster_seed5` = extract_cluster_assignment(km_fitted3)$.cluster)
Code
# seed <- database4 %>%
#   group_by(`7cluster_seed41`) %>%
#   summarize(mean_score=mean(score),
#             sd_score=sd(score),
#             .groups = 'drop') %>%
#   arrange(desc(mean_score))
# seed
Code
# write.csv(seed, here::here("7cluster_seed.csv"), row.names = FALSE)
Code
database_new %>% 
  summarize(mean_score=mean(score),
            sd_score=sd(score),
            min=quantile(score, probs=0),
            p25th=quantile(score,probs=0.25),
            median=quantile(score,probs=0.5),
            p75th=quantile(score,probs=0.75),
            max=quantile(score, probs=1)
            )
  mean_score sd_score min p25th median p75th max
1   61.67342 8.346453  17    57     63    67  88

For database

Code
database_lowest_removed <- database_new %>%
  filter(score > 57)
Code
# database3 <- database_lowest_removed %>%
#   mutate(`7cluster` = extract_cluster_assignment(km_fitted3)$.cluster)
Code
# database3 <- database3 %>%
#   mutate(`7cluster_s5` = extract_cluster_assignment(km_fitted3)$.cluster)
Code
# database3 %>%
#   group_by(`6cluster`) %>%
#   summarize(mean_score=mean(score),
#             sd_score=sd(score),
#             p25th=quantile(score,probs=0.25),
#             median=quantile(score,probs=0.5),
#             p75th=quantile(score,probs=0.75),
#             count=n(),
#             .groups = 'drop') %>%
#   arrange(desc(mean_score))
Code
# write.csv(database3, here::here("normalized_clusters-CORRECTED.csv"), row.names = FALSE)
Code
cluster_data <- read.csv(here::here("normalized_clusters-CORRECTED.csv"))
Code
sum((cluster_data %>% select(X6cluster)) == (cluster_data %>% select(X6cluster_s5)))
[1] 26815
Code
sum((cluster_data %>% select(X6cluster)) == (cluster_data %>% select(X6cluster_s6)))
[1] 18244
Code
sum((cluster_data %>% select(X6cluster_s5)) == (cluster_data %>% select(X6cluster_s6)))
[1] 16986
Code
nrow(cluster_data)
[1] 36708
Code
cluster_data %>%
  group_by(X7cluster) %>%
  summarize(mean_score=mean(score),
            sd_score=sd(score),
            p25th=quantile(score,probs=0.25),
            median=quantile(score,probs=0.5),
            p75th=quantile(score,probs=0.75),
            count=n(),
            .groups = 'drop')
# A tibble: 7 × 7
  X7cluster mean_score sd_score p25th median p75th count
  <chr>          <dbl>    <dbl> <dbl>  <dbl> <dbl> <int>
1 Cluster_1       65.3     4.62    62     65    68  5163
2 Cluster_2       64.1     4.04    61     64    67  6142
3 Cluster_3       68.6     5.28    65     69    72  6633
4 Cluster_4       66.6     5.13    63     66    70  5130
5 Cluster_5       63.3     3.74    60     63    66  6529
6 Cluster_6       66.3     4.73    63     66    70  3838
7 Cluster_7       64.6     4.71    61     64    68  3273

Inferences

Code
count_prop <- function(card_name, database){
  df_pos <- data.frame(
    row1col1 = as.numeric(nrow(database %>% filter(row1col1 == card_name))),
    row1col2 = as.numeric(nrow(database %>% filter(row1col2 == card_name))),
    row1col3 = as.numeric(nrow(database %>% filter(row1col3 == card_name))),
    row1col4 = as.numeric(nrow(database %>% filter(row1col4 == card_name))),
    row1col5 = as.numeric(nrow(database %>% filter(row1col5 == card_name))),
    row2col1 = as.numeric(nrow(database %>% filter(row2col1 == card_name))),
    row2col2 = as.numeric(nrow(database %>% filter(row2col2 == card_name))),
    row2col3 = as.numeric(nrow(database %>% filter(row2col3 == card_name))),
    row2col4 = as.numeric(nrow(database %>% filter(row2col4 == card_name))),
    row2col5 = as.numeric(nrow(database %>% filter(row2col5 == card_name))),
    row3col1 = as.numeric(nrow(database %>% filter(row3col1 == card_name))),
    row3col2 = as.numeric(nrow(database %>% filter(row3col2 == card_name))),
    row3col3 = as.numeric(nrow(database %>% filter(row3col3 == card_name))),
    row3col4 = as.numeric(nrow(database %>% filter(row3col4 == card_name))),
    row3col5 = as.numeric(nrow(database %>% filter(row3col5 == card_name))),
    row4col1 = as.numeric(nrow(database %>% filter(row4col1 == card_name))),
    row4col2 = as.numeric(nrow(database %>% filter(row4col2 == card_name))),
    row4col3 = as.numeric(nrow(database %>% filter(row4col3 == card_name))),
    row4col4 = as.numeric(nrow(database %>% filter(row4col4 == card_name))),
    row4col5 = as.numeric(nrow(database %>% filter(row4col5 == card_name)))
  )
  
  df_pos_per <- apply(df_pos, 1, function(x) x/sum(x))

  row <- c("row1", "row2", "row3", "row4")
  col <- c("col1", "col2", "col3", "col4", "col5")
  df_hm <- expand.grid(col = col, row = row)
  df_hm <- df_hm %>%
    mutate(proportion = df_pos_per[,1])
  
  return(df_hm)
  
}
Code
# bear_pos <- data.frame(
#   row1col1 = as.numeric(nrow(database_new %>% filter(row1col1 == "Bear"))),
#   row1col2 = as.numeric(nrow(database_new %>% filter(row1col2 == "Bear"))),
#   row1col3 = as.numeric(nrow(database_new %>% filter(row1col3 == "Bear"))),
#   row1col4 = as.numeric(nrow(database_new %>% filter(row1col4 == "Bear"))),
#   row1col5 = as.numeric(nrow(database_new %>% filter(row1col5 == "Bear"))),
#   row2col1 = as.numeric(nrow(database_new %>% filter(row2col1 == "Bear"))),
#   row2col2 = as.numeric(nrow(database_new %>% filter(row2col2 == "Bear"))),
#   row2col3 = as.numeric(nrow(database_new %>% filter(row2col3 == "Bear"))),
#   row2col4 = as.numeric(nrow(database_new %>% filter(row2col4 == "Bear"))),
#   row2col5 = as.numeric(nrow(database_new %>% filter(row2col5 == "Bear"))),
#   row3col1 = as.numeric(nrow(database_new %>% filter(row3col1 == "Bear"))),
#   row3col2 = as.numeric(nrow(database_new %>% filter(row3col2 == "Bear"))),
#   row3col3 = as.numeric(nrow(database_new %>% filter(row3col3 == "Bear"))),
#   row3col4 = as.numeric(nrow(database_new %>% filter(row3col4 == "Bear"))),
#   row3col5 = as.numeric(nrow(database_new %>% filter(row3col5 == "Bear"))),
#   row4col1 = as.numeric(nrow(database_new %>% filter(row4col1 == "Bear"))),
#   row4col2 = as.numeric(nrow(database_new %>% filter(row4col2 == "Bear"))),
#   row4col3 = as.numeric(nrow(database_new %>% filter(row4col3 == "Bear"))),
#   row4col4 = as.numeric(nrow(database_new %>% filter(row4col4 == "Bear"))),
#   row4col5 = as.numeric(nrow(database_new %>% filter(row4col5 == "Bear")))
# )
# 
# bear_pos_per <- apply(bear_pos, 1, function(x) x/sum(x))
# 
# row <- c("row1", "row2", "row3", "row4")
# col <- c("col1", "col2", "col3", "col4", "col5")
# bear_hm <- expand.grid(col = col, row = row)
# bear_hm <- bear_hm %>%
#   mutate(proportion = bear_pos_per[,1])
Code
make_heatmap <- function(df, title){
  ggplot(aes(x=col, y=row, fill=proportion), data=df) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  labs(title=title)
}
Code
# make_heatmap(count_prop("Bear", database_new), "Bear")
Code
# make_heatmap(count_prop("Bee", database_new), "Bee")
Code
# make_heatmap(count_prop("Meadow", database_new), "Meadow")
Code
# make_heatmap(count_prop("Trout", database_new), "Trout")
Code
# make_heatmap(count_prop("Eagle", database_new), "Eagle")
Code
# make_heatmap(count_prop("Rabbit", database_new), "Rabbit")
Code
# make_heatmap(count_prop("Dragonfly", database_new), "Dragonfly")
Code
# make_heatmap(count_prop("Fox", database_new), "Fox")
Code
# make_heatmap(count_prop("Deer", database_new), "Deer")
Code
# make_heatmap(count_prop("Stream", database_new), "Stream")
Code
# make_heatmap(count_prop("Wolf", database_new), "Wolf")
Code
bind_6clusters <- function(cluster_num){
  rbind(count_prop("Bear", 
           df <- cluster_data %>%
              filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Bear"),
        count_prop("Bee", 
           df <- cluster_data %>%
              filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Bee"),
        count_prop("Meadow", 
           df <- cluster_data %>%
              filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Meadow"),
        count_prop("Trout", 
           df <- cluster_data %>%
              filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Trout"),
        count_prop("Eagle", 
           df <- cluster_data %>%
              filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Eagle"),
        count_prop("Rabbit", 
           df <- cluster_data %>%
              filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Rabbit"),
        count_prop("Dragonfly", 
           df <- cluster_data %>%
              filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Dragonfly"),
        count_prop("Fox", 
           df <- cluster_data %>%
              filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Fox"),
        count_prop("Deer", 
           df <- cluster_data %>%
              filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Deer"),
        count_prop("Stream", 
           df <- cluster_data %>%
              filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Stream"),
        count_prop("Wolf", 
           df <- cluster_data %>%
              filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Wolf")
  )
}
Code
cluster_position <- rbind(bind_6clusters("Cluster_1"),
                          bind_6clusters("Cluster_2"),
                          bind_6clusters("Cluster_3"),
                          bind_6clusters("Cluster_4"),
                          bind_6clusters("Cluster_5"),
                          bind_6clusters("Cluster_6")
                    )
Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Bear"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Bear")

Code
overall_position <- rbind(count_prop("Bear", database_lowest_removed) %>% mutate(card = "Bear"),
                          count_prop("Bee", database_lowest_removed) %>% mutate(card = "Bee"),
                          count_prop("Meadow", database_lowest_removed) %>% mutate(card = "Meadow"),
                          count_prop("Trout", database_lowest_removed) %>% mutate(card = "Trout"),
                          count_prop("Eagle", database_lowest_removed) %>% mutate(card = "Eagle"),
                          count_prop("Rabbit", database_lowest_removed) %>% mutate(card = "Rabbit"),
                          count_prop("Dragonfly", database_lowest_removed) %>% mutate(card = "Dragonfly"),
                          count_prop("Fox", database_lowest_removed) %>% mutate(card = "Fox"),
                          count_prop("Deer", database_lowest_removed) %>% mutate(card = "Deer"),
                          count_prop("Stream", database_lowest_removed) %>% mutate(card = "Stream"),
                          count_prop("Wolf", database_lowest_removed) %>% mutate(card = "Wolf")
                          )
Code
ggplot(aes(x=col, y=row, fill=proportion), data=(overall_position)) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(card)) +
  labs(title="Bear positions across clusters")

Cluster Inferences

Code
cluster_table <- function(Xcluster, cluster_num, cluster_data){
  df <- cluster_data %>%
    filter(
      case_when(
        Xcluster == 7 ~ X7cluster == cluster_num,
        Xcluster == 6 ~ X6cluster == cluster_num,
        Xcluster == 5 ~ X5cluster == cluster_num,
        Xcluster == 4 ~ X4cluster == cluster_num,
        Xcluster == 3 ~ X3cluster == cluster_num
      )
    ) %>%
    summarize(mean_score=mean(score),
              sd_score=sd(score),
              p25th=quantile(score,probs=0.25),
              median=quantile(score,probs=0.5),
              p75th=quantile(score,probs=0.75),
              count=n(),
              .groups = 'drop')
  return(df)
}
Code
cluster_prop <- function(Xcluster, cluster_num, database){
  df <- database %>%
    filter(
      case_when(
        Xcluster == 7 ~ X7cluster == cluster_num,
        Xcluster == 6 ~ X6cluster == cluster_num,
        Xcluster == 5 ~ X5cluster == cluster_num,
        Xcluster == 4 ~ X4cluster == cluster_num,
        Xcluster == 3 ~ X3cluster == cluster_num,
        Xcluster == 8 ~ X8cluster == cluster_num,
        Xcluster == 65 ~ X6cluster_s5 == cluster_num,
        Xcluster == 66 ~ X6cluster_s6 == cluster_num,
        Xcluster == 85 ~ X8cluster_s5 == cluster_num,
        Xcluster == 75 ~ X7cluster_s5 == cluster_num
      )
    )
  
  bear_count = 0
  bee_count = 0
  meadow_count = 0
  trout_count = 0
  eagle_count = 0
  rabbit_count = 0
  dragonfly_count = 0
  fox_count = 0
  deer_count = 0
  stream_count = 0
  wolf_count = 0
      
  for (j in 2:21){
    for(i in 1:nrow(df)){
      
      if(df[i,j] == "Bear"){
        bear_count = bear_count + 1
      }else if(df[i,j] == "Bee"){
        bee_count = bee_count + 1
      }else if(df[i,j] == "Meadow"){
        meadow_count = meadow_count + 1
      }else if(df[i,j] == "Trout"){
        trout_count = trout_count + 1
      }else if(df[i,j] == "Eagle"){
        eagle_count = eagle_count + 1
      }else if(df[i,j] == "Rabbit"){
        rabbit_count = rabbit_count + 1
      }else if(df[i,j] == "Dragonfly"){
        dragonfly_count = dragonfly_count + 1
      }else if(df[i,j] == "Fox"){
        fox_count = fox_count + 1
      }else if(df[i,j] == "Deer"){
        deer_count = deer_count + 1
      }else if(df[i,j] == "Stream"){
        stream_count = stream_count + 1
      }else if(df[i,j] == "Wolf"){
        wolf_count = wolf_count + 1
      }else{
        print("bugged")
      }
      
    }
  }
  
  df_prop <- data.frame(
    name = c("bear", "bee", "meadow", "trout", "eagle", "rabbit",
               "dragonfly", "fox", "deer", "stream", "wolf"),
    proportion = c(bear_count/(nrow(df)*20),
                bee_count/(nrow(df)*20),
                meadow_count/(nrow(df)*20),
                trout_count/(nrow(df)*20),
                eagle_count/(nrow(df)*20),
                rabbit_count/(nrow(df)*20),
                dragonfly_count/(nrow(df)*20),
                fox_count/(nrow(df)*20),
                deer_count/(nrow(df)*20),
                stream_count/(nrow(df)*20),
                wolf_count/(nrow(df)*20)),
    true_prop = c(12/130,
                  8/130,
                  20/130,
                  10/130,
                  8/130,
                  8/130,
                  8/130,
                  12/130,
                  12/130,
                  20/130,
                  12/130)
  )
  
  return(df_prop)
}
Code
make_bars <- function(df, title){
  ggplot(aes(x = reorder(name, -proportion), y = proportion, fill = reorder(name, -proportion)), data=df) + 
    geom_bar(stat = "identity") +
    scale_fill_brewer(palette="PRGn", direction = -1) +
    labs(x = "card", title=title) +
    theme(legend.position = "none")
}
Code
exact_card_count <- function(Xcluster=NULL, cluster_num=NULL, database){

  bear_exact <- rep(0, 13)
  bee_exact <- rep(0, 9)
  meadow_exact <- rep(0, 21)
  trout_exact <- rep(0, 11)
  eagle_exact <- rep(0, 9)
  rabbit_exact <- rep(0, 9)
  dragonfly_exact <- rep(0, 9)
  fox_exact <- rep(0, 13)
  deer_exact <- rep(0, 13)
  stream_exact <- rep(0, 21)
  wolf_exact <- rep(0, 13)
  
  if(is.null(Xcluster) && is.null(cluster_num)){
    df <- database %>%
      filter(pool == "default")
  }else{
    df <- database %>%
    filter(
      case_when(
        Xcluster == 7 ~ X7cluster == cluster_num,
        Xcluster == 6 ~ X6cluster == cluster_num,
        Xcluster == 5 ~ X5cluster == cluster_num,
        Xcluster == 4 ~ X4cluster == cluster_num,
        Xcluster == 3 ~ X3cluster == cluster_num
      )
    )
  }
  
  
  for(i in 1:nrow(df)){
    bear_count <- 0
    bee_count <- 0
    meadow_count <- 0
    trout_count <- 0
    eagle_count <- 0
    rabbit_count <- 0
    dragonfly_count <- 0
    fox_count <- 0
    deer_count <- 0
    stream_count <- 0
    wolf_count <- 0
    
    for(j in 2:21){
      if(df[i,j] == "Bear"){
        bear_count = bear_count + 1
      }else if(df[i,j] == "Bee"){
        bee_count = bee_count + 1
      }else if(df[i,j] == "Meadow"){
        meadow_count = meadow_count + 1
      }else if(df[i,j] == "Trout"){
        trout_count = trout_count + 1
      }else if(df[i,j] == "Eagle"){
        eagle_count = eagle_count + 1
      }else if(df[i,j] == "Rabbit"){
        rabbit_count = rabbit_count + 1
      }else if(df[i,j] == "Dragonfly"){
        dragonfly_count = dragonfly_count + 1
      }else if(df[i,j] == "Fox"){
        fox_count = fox_count + 1
      }else if(df[i,j] == "Deer"){
        deer_count = deer_count + 1
      }else if(df[i,j] == "Stream"){
        stream_count = stream_count + 1
      }else if(df[i,j] == "Wolf"){
        wolf_count = wolf_count + 1
      }else{
        print("bugged")
      }
    }
    
    bear_exact[bear_count+1] = bear_exact[bear_count+1] + 1
    bee_exact[bee_count+1] = bee_exact[bee_count+1] + 1
    meadow_exact[meadow_count+1] = meadow_exact[meadow_count+1] + 1
    trout_exact[trout_count+1] = trout_exact[trout_count+1] + 1
    eagle_exact[eagle_count+1] = eagle_exact[eagle_count+1] + 1
    rabbit_exact[rabbit_count+1] = rabbit_exact[rabbit_count+1] + 1
    dragonfly_exact[dragonfly_count+1] = dragonfly_exact[dragonfly_count+1] + 1
    fox_exact[fox_count+1] = fox_exact[fox_count+1] + 1
    deer_exact[deer_count+1] = deer_exact[deer_count+1] + 1
    stream_exact[stream_count+1] = stream_exact[stream_count+1] + 1
    wolf_exact[wolf_count+1] = wolf_exact[wolf_count+1] + 1
    
  }
  
  bear_exact[8] = bear_exact[8]+bear_exact[9]+bear_exact[10]+
    bear_exact[11]+bear_exact[12]+bear_exact[13]
  
  bee_exact[8] = bee_exact[8]+bee_exact[9]
  
  meadow_exact[8] = meadow_exact[8]+meadow_exact[9]+meadow_exact[10]+
    meadow_exact[11]+meadow_exact[12]+meadow_exact[13]+meadow_exact[14]+
    meadow_exact[15]+meadow_exact[16]+meadow_exact[17]+meadow_exact[18]+
    meadow_exact[19]+meadow_exact[20]+meadow_exact[21]
  
  trout_exact[8] = trout_exact[8]+trout_exact[9]+trout_exact[10]+
    trout_exact[11]
  
  eagle_exact[8] = eagle_exact[8]+eagle_exact[9]
  
  rabbit_exact[8] = rabbit_exact[8]+rabbit_exact[9]
  
  dragonfly_exact[8] = dragonfly_exact[8]+dragonfly_exact[9]
  
  fox_exact[8] = fox_exact[8]+fox_exact[9]+fox_exact[10]+
    fox_exact[11]+fox_exact[12]+fox_exact[13]
  
  deer_exact[8] = deer_exact[8]+deer_exact[9]+deer_exact[10]+
    deer_exact[11]+deer_exact[12]+deer_exact[13]
  
  stream_exact[8] = stream_exact[8]+stream_exact[9]+stream_exact[10]+
    stream_exact[11]+stream_exact[12]+stream_exact[13]+stream_exact[14]+
    stream_exact[15]+stream_exact[16]+stream_exact[17]+stream_exact[18]+
    stream_exact[19]+stream_exact[20]+stream_exact[21]
  
  wolf_exact[8] = wolf_exact[8]+wolf_exact[9]+wolf_exact[10]+
    wolf_exact[11]+wolf_exact[12]+wolf_exact[13]
  
  bear_exact = bear_exact[1:8]
  bee_exact = bee_exact[1:8]
  meadow_exact = meadow_exact[1:8]
  trout_exact = trout_exact[1:8]
  eagle_exact = eagle_exact[1:8]
  rabbit_exact = rabbit_exact[1:8]
  dragonfly_exact = dragonfly_exact[1:8]
  fox_exact = fox_exact[1:8]
  deer_exact = deer_exact[1:8]
  stream_exact = stream_exact[1:8]
  wolf_exact = wolf_exact[1:8]
  
  result <- data.frame(
    card = c(rep("Bear", 8), 
             rep("Bee", 8),
             rep("Meadow", 8),
             rep("Trout", 8),
             rep("Eagle", 8),
             rep("Rabbit", 8),
             rep("Dragonfly", 8),
             rep("Fox", 8),
             rep("Deer", 8),
             rep("Stream", 8),
             rep("Wolf", 8)
             ),
    num_exact = c(seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7),
                  seq(0,7)
                  ),
    proportion = c(bear_exact/(nrow(df)),
              bee_exact/(nrow(df)),
              meadow_exact/(nrow(df)),
              trout_exact/(nrow(df)),
              eagle_exact/(nrow(df)),
              rabbit_exact/(nrow(df)),
              dragonfly_exact/(nrow(df)),
              fox_exact/(nrow(df)),
              deer_exact/(nrow(df)),
              stream_exact/(nrow(df)),
              wolf_exact/(nrow(df))
              )
    
  )
  
  return(result)
}
Code
make_exact_bars <- function(df, title){
  ggplot(aes(x = factor(num_exact),
           y = proportion,
           fill = factor(num_exact)),
         data =
           (df %>%
              filter(proportion != 0))
        ) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  facet_wrap(vars(card),scales = "free_x") +
  labs(x="Exact number of cards",
       fill="Exact number of cards",
       title=title)
}

7cluster Overall Proportions

Code
nrow(database_new %>% filter(pool == "default"))
[1] 30000
Code
default_pool_props <- exact_card_count(database=database_new)
Code
ggplot(aes(x = factor(num_exact),
           y = proportion,
           fill = factor(num_exact)),
         data =
           (default_pool_props %>%
              filter(proportion != 0))
        ) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  facet_wrap(vars(card)) +
  labs(x="Exact number of cards",
       fill="Exact number of cards",
       title="Overall proportions of the default pool")

Cluster 7

Summary statistics for cluster 7

Code
# cluster_table(7, "Cluster_7", cluster_data)

Proportion of each card type out of all the cards in the cluster

Code
# make_bars(cluster_prop(7, "Cluster_7", cluster_data), "7Cluster: Cluster_7")

Distributions of the exact number of each card type out of all grids in the cluster

Code
# make_exact_bars(exact_card_count(7, "Cluster_7", cluster_data), title="7Cluster: Cluster_7")

Cluster 6

Code
# cluster_table(7, "Cluster_6", cluster_data)
Code
# make_bars(cluster_prop(7, "Cluster_6", cluster_data), "7Cluster: Cluster_6")
Code
# make_exact_bars(exact_card_count(7, "Cluster_6", cluster_data), title="7Cluster: Cluster_6")

Cluster 3

Code
# cluster_table(7, "Cluster_3", cluster_data)
Code
# make_bars(cluster_prop(7, "Cluster_3", cluster_data), "7Cluster: Cluster_3")
Code
# make_exact_bars(exact_card_count(7, "Cluster_3", cluster_data), title="7Cluster: Cluster_3")

Cluster 4

Code
# cluster_table(7, "Cluster_4", cluster_data)
Code
# make_bars(cluster_prop(7, "Cluster_4", cluster_data), "7Cluster: Cluster_4")
Code
# make_exact_bars(exact_card_count(7, "Cluster_4", cluster_data), title="7Cluster: Cluster_4")

Cluster 2

Code
# cluster_table(7, "Cluster_2", cluster_data)
Code
# make_bars(cluster_prop(7, "Cluster_2", cluster_data), "7Cluster: Cluster_2")
Code
# make_exact_bars(exact_card_count(7, "Cluster_2", cluster_data), title="7Cluster: Cluster_2")

Cluster 1

Code
# cluster_table(7, "Cluster_1", cluster_data)
Code
# make_bars(cluster_prop(7, "Cluster_1", cluster_data), "7Cluster: Cluster_1")
Code
# make_exact_bars(exact_card_count(7, "Cluster_1", cluster_data), title="7Cluster: Cluster_1")

Cluster 5

Code
# cluster_table(7, "Cluster_5", cluster_data)
Code
# make_bars(cluster_prop(7, "Cluster_5", cluster_data), "7Cluster: Cluster_5")
Code
# make_exact_bars(exact_card_count(7, "Cluster_5", cluster_data), title="7Cluster: Cluster_5")

Additional Displays

Code
overall_position <- overall_position %>%
    mutate(
      card = factor(card, levels = c(
      "Meadow", "Stream", "Bear", 
      "Deer", "Fox", "Wolf", "Trout", "Bee", 
      "Dragonfly", "Eagle", "Rabbit"
    )))
Code
ggplot(aes(x=col, y=row, fill=proportion), data=(overall_position)) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(card)) +
  labs(title="Positions across all 50,000")

Code
# all_cluster_prop7_s5 <- rbind(cluster_prop(75, "Cluster_1", cluster_data) %>% mutate(cluster = "Cluster_1"),
#                           cluster_prop(75, "Cluster_2", cluster_data) %>% mutate(cluster = "Cluster_2"),
#                           cluster_prop(75, "Cluster_3", cluster_data) %>% mutate(cluster = "Cluster_3"),
#                           cluster_prop(75, "Cluster_4", cluster_data) %>% mutate(cluster = "Cluster_4"),
#                           cluster_prop(75, "Cluster_5", cluster_data) %>% mutate(cluster = "Cluster_5"),
#                           cluster_prop(75, "Cluster_6", cluster_data) %>% mutate(cluster = "Cluster_6"),
#                           cluster_prop(75, "Cluster_7", cluster_data) %>% mutate(cluster = "Cluster_7")
#                           )
Code
# all_exact <- rbind(exact_card_count(6, "Cluster_1", cluster_data) %>% mutate(cluster = "Cluster_1"),
#                    exact_card_count(6, "Cluster_2", cluster_data) %>% mutate(cluster = "Cluster_2"),
#                    exact_card_count(6, "Cluster_3", cluster_data) %>% mutate(cluster = "Cluster_3"),
#                    exact_card_count(6, "Cluster_4", cluster_data) %>% mutate(cluster = "Cluster_4"),
#                    exact_card_count(6, "Cluster_5", cluster_data) %>% mutate(cluster = "Cluster_5"),
#                    exact_card_count(6, "Cluster_6", cluster_data) %>% mutate(cluster = "Cluster_6")
# )
Code
# write.csv(all_exact, here::here("all_exact-CORRECTED.csv"), row.names = FALSE)
Code
# write.csv(all_cluster_prop, here::here("cluster_prop6-CORRECTED.csv"), row.names = FALSE)
Code
cluster_position_renamed <- cluster_position %>%
  mutate(
      cluster = factor(cluster, levels = c(
      "Cluster_1", "Cluster_2", "Cluster_3",
      "Cluster_4", "Cluster_5", "Cluster_6"
    ))) %>%
    mutate(cluster = fct_recode(cluster,
      "Bear Trout Bee" = "Cluster_1",
      "Stream Dragonfly" = "Cluster_2",
      "Meadow Bee" = "Cluster_3",
      "Eagle Rabbit" = "Cluster_4",
      "Deer Fox" = "Cluster_5",
      "Elevated Trout" = "Cluster_6"
    ))
Code
all_cluster_prop <- read.csv(here::here("cluster_prop6-CORRECTED.csv")) %>%
  mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:6)))
Code
all_cluster_prop <- all_cluster_prop %>%
  mutate(
    name = case_when(
      name == "bear" ~ "Bear",
      name == "bee" ~ "Bee",
      name == "meadow" ~ "Meadow",
      name == "stream" ~ "Stream",
      name == "dragonfly" ~ "Dragonfly",
      name == "eagle" ~ "Eagle",
      name == "rabbit" ~ "Rabbit",
      name == "fox" ~ "Fox",
      name == "deer" ~ "Deer",
      name == "trout" ~ "Trout",
      name == "wolf" ~ "Wolf",
    )
  )
Code
all_cluster_prop_renamed <- all_cluster_prop %>%
    mutate(
      cluster = factor(cluster, levels = c(
      "Cluster_1", "Cluster_2", "Cluster_3", 
      "Cluster_4", "Cluster_5", "Cluster_6"
    ))) %>%
    mutate(cluster = fct_recode(cluster,
      "Bear Trout Bee" = "Cluster_1",
      "Stream Dragonfly" = "Cluster_2",
      "Meadow Bee" = "Cluster_3",
      "Eagle Rabbit" = "Cluster_4",
      "Deer Fox" = "Cluster_5",
      "Elevated Trout" = "Cluster_6"
    ))
Code
all_exact <- read.csv(here::here("all_exact-CORRECTED.csv")) 
Code
all_exact_renamed <- all_exact %>%
    mutate(
      cluster = factor(cluster, levels = c(
      "Cluster_1", "Cluster_2", "Cluster_3", 
      "Cluster_4", "Cluster_5", "Cluster_6"
    ))) %>%
    mutate(cluster = fct_recode(cluster,
      "Bear Trout Bee" = "Cluster_1",
      "Stream Dragonfly" = "Cluster_2",
      "Meadow Bee" = "Cluster_3",
      "Eagle Rabbit" = "Cluster_4",
      "Deer Fox" = "Cluster_5",
      "Elevated Trout" = "Cluster_6"
    ))
Code
cluster_data_renamed <- cluster_data %>%
    mutate(
      X6cluster = factor(X6cluster, levels = c(
      "Cluster_1", "Cluster_2", "Cluster_3", 
      "Cluster_4", "Cluster_5", "Cluster_6"
    ))) %>%
    mutate(X6cluster = fct_recode(X6cluster,
      "Bear Trout Bee" = "Cluster_1",
      "Stream Dragonfly" = "Cluster_2",
      "Meadow Bee" = "Cluster_3",
      "Eagle Rabbit" = "Cluster_4",
      "Deer Fox" = "Cluster_5",
      "Elevated Trout" = "Cluster_6"
    ))
Code
results_table <- cluster_data_renamed %>%
  group_by(X6cluster) %>%
  summarize(mean_score=mean(score),
            sd_score=sd(score),
            p25th=quantile(score,probs=0.25),
            median=quantile(score,probs=0.5),
            p75th=quantile(score,probs=0.75),
            count=n(),
            .groups = 'drop') %>%
  arrange(desc(mean_score))
results_table
# A tibble: 6 × 7
  X6cluster        mean_score sd_score p25th median p75th count
  <fct>                 <dbl>    <dbl> <dbl>  <dbl> <dbl> <int>
1 Meadow Bee             67.8     5.35    64     68    72  7315
2 Eagle Rabbit           66.6     5.18    62     66    70  5852
3 Elevated Trout         66.4     4.82    63     66    70  4265
4 Bear Trout Bee         65.4     4.73    62     65    69  5560
5 Stream Dragonfly       64.1     4.08    61     64    67  6448
6 Deer Fox               63.5     3.91    60     63    66  7268
Code
# write.csv(results_table, here::here("results_table-CORRECTED.csv"), row.names = FALSE)

7 cluster

Code
# write.csv(all_cluster_prop7, here::here("cluster_prop7-CORRECTED.csv"), row.names = FALSE)
Code
all_cluster_prop7 <- read.csv(here::here("cluster_prop7-CORRECTED.csv")) %>%
  mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:7)))
Code
# cluster_data_renamed %>%
#   group_by(X7cluster) %>%
#   summarize(mean_score=mean(score),
#             sd_score=sd(score),
#             p25th=quantile(score,probs=0.25),
#             median=quantile(score,probs=0.5),
#             p75th=quantile(score,probs=0.75),
#             count=n(),
#             .groups = 'drop') %>%
#   arrange(desc(mean_score))
Code
ggplot(aes(x = reorder(name, -true_prop), 
           y = proportion, 
           fill = reorder(name, -true_prop)), 
       data=all_cluster_prop7) + 
  geom_bar(stat = "identity") +
  geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
             color = "red",
             data=all_cluster_prop7,
             show.legend = FALSE) +
  scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
                    "#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
  labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        panel.grid.major = element_line(color = "black"),
        panel.grid.minor = element_line(color = "black"),
        panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank()) +
  facet_wrap(vars(cluster))

7 clust s5

Code
# write.csv(all_cluster_prop7_s5, here::here("cluster_prop7_seed5-CORRECTED.csv"), row.names = FALSE)
Code
all_cluster_prop7_s5 <- read.csv(here::here("cluster_prop7_seed5-CORRECTED.csv")) %>%
  mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:7)))
Code
ggplot(aes(x = reorder(name, -true_prop), 
           y = proportion, 
           fill = reorder(name, -true_prop)), 
       data=all_cluster_prop7_s5) + 
  geom_bar(stat = "identity") +
  geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
             color = "red",
             data=all_cluster_prop7_s5,
             show.legend = FALSE) +
  scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
                    "#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
  labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        panel.grid.major = element_line(color = "black"),
        panel.grid.minor = element_line(color = "black"),
        panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank()) +
  facet_wrap(vars(cluster))

6 cluster

Code
# write.csv(all_cluster_prop6, here::here("cluster_prop6-CORRECTED.csv"), row.names = FALSE)
Code
all_cluster_prop6 <- read.csv(here::here("cluster_prop6-CORRECTED.csv")) %>%
  mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:6)))
Code
ggplot(aes(x = reorder(name, -true_prop), 
           y = proportion, 
           fill = reorder(name, -true_prop)), 
       data=all_cluster_prop) + 
  geom_bar(stat = "identity") +
  geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
             color = "red",
             data=all_cluster_prop,
             show.legend = FALSE) +
  scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
                    "#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
  labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        panel.grid.major = element_line(color = "black"),
        panel.grid.minor = element_line(color = "black"),
        panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank()) +
  facet_wrap(vars(cluster))

6 clust alt

Code
ggplot(aes(x = reorder(name, -true_prop), 
           y = proportion, 
           fill = reorder(name, -true_prop)), 
       data=all_cluster_prop) + 
  geom_bar(stat = "identity") +
  geom_tile(
    aes(
      x = reorder(name, -true_prop),
      y = true_prop,
      width = 1,  
      height = 0  
    ),
    color = "red",
    fill = "red",
    linetype = "solid",
    linewidth = 0.7,
    alpha = 0.7
  ) +
  scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
                    "#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
  labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        panel.grid.major = element_line(color = "black"),
        panel.grid.minor = element_line(color = "black"),
        panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank()) +
  facet_wrap(vars(cluster))

6 clust s5

Code
# write.csv(all_cluster_prop6_s5, here::here("cluster_prop6_seed5-CORRECTED.csv"), row.names = FALSE)
Code
all_cluster_prop6_s5 <- read.csv(here::here("cluster_prop6_seed5-CORRECTED.csv")) %>%
  mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:6)))
Code
ggplot(aes(x = reorder(name, -true_prop), 
           y = proportion, 
           fill = reorder(name, -true_prop)), 
       data=all_cluster_prop6_s5) + 
  geom_bar(stat = "identity") +
  geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
             color = "red",
             data=all_cluster_prop6_s5,
             show.legend = FALSE) +
  scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
                    "#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
  labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        panel.grid.major = element_line(color = "black"),
        panel.grid.minor = element_line(color = "black"),
        panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank()) +
  facet_wrap(vars(cluster))

6 clust s6

Code
# write.csv(all_cluster_prop6_s6, here::here("cluster_prop6_seed6-CORRECTED.csv"), row.names = FALSE)
Code
all_cluster_prop6_s6 <- read.csv(here::here("cluster_prop6_seed6-CORRECTED.csv")) %>%
  mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:6)))
Code
ggplot(aes(x = reorder(name, -true_prop), 
           y = proportion, 
           fill = reorder(name, -true_prop)), 
       data=all_cluster_prop6_s6) + 
  geom_bar(stat = "identity") +
  geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
             color = "red",
             data=all_cluster_prop6_s6,
             show.legend = FALSE) +
  scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
                    "#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
  labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        panel.grid.major = element_line(color = "black"),
        panel.grid.minor = element_line(color = "black"),
        panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank()) +
  facet_wrap(vars(cluster))

5 cluster

Code
# write.csv(all_cluster_prop5, here::here("cluster_prop5-CORRECTED.csv"), row.names = FALSE)
Code
all_cluster_prop5 <- read.csv(here::here("cluster_prop5-CORRECTED.csv")) %>%
  mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:5)))
Code
ggplot(aes(x = reorder(name, -true_prop), 
           y = proportion, 
           fill = reorder(name, -true_prop)), 
       data=all_cluster_prop5) + 
  geom_bar(stat = "identity") +
  geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
             color = "red",
             data=all_cluster_prop5,
             show.legend = FALSE) +
  scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
                    "#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
  labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        panel.grid.major = element_line(color = "black"),
        panel.grid.minor = element_line(color = "black"),
        panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank()) +
  facet_wrap(vars(cluster))

8 cluster

Code
# write.csv(all_cluster_prop8, here::here("cluster_prop8-CORRECTED.csv"), row.names = FALSE)
Code
all_cluster_prop8 <- read.csv(here::here("cluster_prop8-CORRECTED.csv")) %>%
  mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:8)))
Code
ggplot(aes(x = reorder(name, -true_prop), 
           y = proportion, 
           fill = reorder(name, -true_prop)), 
       data=all_cluster_prop8) + 
  geom_bar(stat = "identity") +
  geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
             color = "red",
             data=all_cluster_prop8,
             show.legend = FALSE) +
  scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
                    "#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
  labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        panel.grid.major = element_line(color = "black"),
        panel.grid.minor = element_line(color = "black"),
        panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank()) +
  facet_wrap(vars(cluster))

8 clust s5

Code
# write.csv(all_cluster_prop8_s5, here::here("cluster_prop8_seed5-CORRECTED.csv"), row.names = FALSE)
Code
all_cluster_prop8_s5 <- read.csv(here::here("cluster_prop8_seed5-CORRECTED.csv")) %>%
  mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:8)))
Code
ggplot(aes(x = reorder(name, -true_prop), 
           y = proportion, 
           fill = reorder(name, -true_prop)), 
       data=all_cluster_prop8_s5) + 
  geom_bar(stat = "identity") +
  geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
             color = "red",
             data=all_cluster_prop8_s5,
             show.legend = FALSE) +
  scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
                    "#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
  labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        panel.grid.major = element_line(color = "black"),
        panel.grid.minor = element_line(color = "black"),
        panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank()) +
  facet_wrap(vars(cluster))

Results

Code
ggplot(aes(x = reorder(name, -true_prop), 
           y = proportion, 
           fill = reorder(name, -true_prop)), 
       data=all_cluster_prop_renamed) + 
  geom_bar(stat = "identity") +
  geom_tile(
    aes(
      x = reorder(name, -true_prop),
      y = true_prop,
      width = 1,  
      height = 0  
    ),
    color = "red",
    fill = "red",
    linetype = "solid",
    linewidth = 0.7,
    alpha = 0.7
  ) +
  scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
                    "#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
  labs(x = "Card", y="Proportion", fill="Card type", title="Propotion of card types in each cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        panel.grid.major = element_line(color = "black"),
        panel.grid.minor = element_line(color = "black"),
        panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank()) +
  facet_wrap(vars(cluster))

Code
all_cluster_prop_renamed2 <- all_cluster_prop_renamed %>%
  mutate(prop_diff = proportion-true_prop)
Code
ggplot(aes(x = reorder(name, -true_prop), 
           y = prop_diff, 
           fill = reorder(name, -true_prop)), 
       data=all_cluster_prop_renamed2) + 
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
                    "#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
  labs(x = "Card", y="Difference in Proportion", fill="Card type", title="Propotion of card types in each cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        panel.grid.major = element_line(color = "black"),
        panel.grid.minor = element_line(color = "black"),
        panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank()) +
  facet_wrap(vars(cluster))

Code
ggplot(aes(x = cluster, 
           y = proportion, 
           fill = cluster), 
       data=all_cluster_prop_renamed) + 
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c("#000000", "#377eb8", "#4daf4a", "#984ea3", "#a65628",
                                "#f781bf")) +
  labs(x = "Card", y="Proportion", fill="Card type", title="Propotion of card types in each cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        panel.grid.major = element_line(color = "black"),
        panel.grid.minor = element_line(color = "black"),
        panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank()) +
  facet_wrap(vars(reorder(name, -true_prop)))

Code
ggplot(aes(x = cluster, 
           y = prop_diff, 
           fill = cluster), 
       data=all_cluster_prop_renamed2) + 
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c("#000000", "#377eb8", "#4daf4a", "#984ea3", "#a65628",
                                "#f781bf")) +
  labs(x = "Card", y="Difference in Proportion", fill="Card type", title="Propotion of card types in each cluster") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
        panel.grid.major = element_line(color = "black"),
        panel.grid.minor = element_line(color = "black"),
        panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank()) +
  facet_wrap(vars(reorder(name, -true_prop)))

Code
ggplot(all_cluster_prop_renamed, aes(x = proportion, y = cluster, color = reorder(name, -true_prop))) +
  geom_point(
    size = 3,
    alpha = 0.8,
    position = position_jitter(height = 0.2, seed = 110)
  ) +
  scale_color_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
                    "#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
  labs(
    x = "Proportion", 
    y = "Cluster",
    color = "Card Type",
    title = "Card Proportions by Cluster"
  ) +
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "right",
    panel.grid.major = element_line(color = "black"),
  )

Code
all_cluster_prop_renamed <- all_cluster_prop_renamed %>%
    mutate(
      name = factor(name, levels = c(
      "meadow", "stream", "bear", 
      "deer", "fox", "wolf", "trout", "bee", 
      "dragonfly", "eagle", "rabbit"
    )))
Code
ggplot(all_cluster_prop_renamed, aes(x = proportion, y = name, color = cluster)) +
  geom_point(
    size = 3,
    position = position_jitter(height = 0.2, seed = 99)
  ) +
  scale_y_discrete(limits = rev) +
  scale_color_manual(values = c("#000000", "#377eb8", "#4daf4a", "#984ea3", "#a65628",
                                "#f781bf")) +
  labs(
    x = "Proportion", 
    y = "Card Type",
    color = "Cluster",
    title = "Card Proportions by Cluster"
  ) +
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "right",
    panel.grid.major = element_line(color = "black")
  )

Code
ggplot(aes(x = cluster,
           y = proportion,
           fill = factor(num_exact, levels = rev(0:7))),
         data = all_exact
        ) +
  geom_bar(position = "fill", stat = "identity") +
  scale_fill_viridis_d() +
  facet_wrap(vars(card)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(x="Clusters",
       fill="Exact number of cards",
       title="Exact number of cards in each cluster")

Code
ggplot(all_exact, aes(x = num_exact, y = proportion, color = cluster)) +
  geom_line(size = 0.5) +
  facet_wrap(~ card) +
  scale_x_continuous(breaks = 0:7) +
  labs(x = "Exact Number of Cards",
       y = "Proportion",
       color = "Cluster",
       title = "Exact number of cards in each cluster") +
  theme_minimal() +
  theme(legend.position = "bottom",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text = element_text(size = 10, face = "bold"),
        panel.spacing = unit(1, "lines"))

Code
ggplot(all_exact_renamed, aes(x = num_exact, y = proportion, color = cluster)) +
  geom_point() +
  geom_line(size = 0.5) +
  facet_wrap(~ card) +
  scale_x_continuous(breaks = 0:7) +
  scale_color_manual(values = c("#000000", "#377eb8", "#4daf4a", "#984ea3", "#a65628",
                                "#f781bf")) +
  labs(x = "Exact Number of Cards",
       y = "Proportion",
       color = "Cluster",
       title = "Exact number of cards in each cluster") +
  theme_minimal() +
  theme(legend.position = "bottom",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.text = element_text(size = 10, face = "bold"),
        panel.spacing = unit(1, "lines"))

Code
db_individual_removed <- read.csv(here::here("db_individual_lowest_removed-CORRECTED.csv"))
Code
cluster_merge <- merge(x = cluster_data, y = db_individual_removed, by = "ID")
Code
cluster_merge_renamed <- cluster_merge %>%
  mutate(
      cluster = factor(X6cluster, levels = c(
      "Cluster_1", "Cluster_2", "Cluster_3",
      "Cluster_4", "Cluster_5", "Cluster_6"
    ))) %>%
    mutate(X6cluster = fct_recode(X6cluster,
      "Bear Trout Bee" = "Cluster_1",
      "Stream Dragonfly" = "Cluster_2",
      "Meadow Bee" = "Cluster_3",
      "Eagle Rabbit" = "Cluster_4",
      "Deer Fox" = "Cluster_5",
      "Elevated Trout" = "Cluster_6"
    ))
Code
ggplot(cluster_merge_renamed |>
         mutate(cluster = factor(X6cluster)),
       aes(x = score, color = cluster, linetype = cluster)) +
  # geom_density(size = 1.2) +
    stat_density(size = 1.1, geom="line",position="identity") +
  labs(x = "Score", y = "Density", color = "Cluster", linetype = "Cluster", title="Score distribution by cluster") +
  scale_color_manual(values = c("#000000", "#377eb8", "#4daf4a", "#984ea3", "#a65628",
                                "#f781bf")) +
  theme_minimal()

Code
results_table
# A tibble: 6 × 7
  X6cluster        mean_score sd_score p25th median p75th count
  <fct>                 <dbl>    <dbl> <dbl>  <dbl> <dbl> <int>
1 Meadow Bee             67.8     5.35    64     68    72  7315
2 Eagle Rabbit           66.6     5.18    62     66    70  5852
3 Elevated Trout         66.4     4.82    63     66    70  4265
4 Bear Trout Bee         65.4     4.73    62     65    69  5560
5 Stream Dragonfly       64.1     4.08    61     64    67  6448
6 Deer Fox               63.5     3.91    60     63    66  7268
Code
cluster_pool <- cluster_merge_renamed %>%
  group_by(`X6cluster`, pool) %>%
  summarize(count = n()) %>%
  mutate(prop = count / sum(count))
cluster_pool
# A tibble: 24 × 4
# Groups:   X6cluster [6]
   X6cluster        pool             count   prop
   <fct>            <chr>            <int>  <dbl>
 1 Bear Trout Bee   bee_meadow         541 0.0973
 2 Bear Trout Bee   default           3551 0.639 
 3 Bear Trout Bee   dragonfly_stream   264 0.0475
 4 Bear Trout Bee   low_eagle_rabbit  1204 0.217 
 5 Stream Dragonfly bee_meadow         186 0.0288
 6 Stream Dragonfly default           2718 0.422 
 7 Stream Dragonfly dragonfly_stream  2647 0.411 
 8 Stream Dragonfly low_eagle_rabbit   897 0.139 
 9 Meadow Bee       bee_meadow        3132 0.428 
10 Meadow Bee       default           2834 0.387 
# ℹ 14 more rows
Code
cluster_pool_renamed <- cluster_pool %>%
    mutate(
      pool = factor(pool, levels = c(
      "default", "bee_meadow", "dragonfly_stream", 
      "low_eagle_rabbit"
    ))) %>%
    mutate(pool = fct_recode(pool,
      "Default Pool" = "default",
      "Meadow Bee Pool" = "bee_meadow",
      "Stream Dragonfly Pool" = "dragonfly_stream",
      "Low Eagle Rabbit Pool" = "low_eagle_rabbit"
    ))
Code
ggplot(aes(x = pool, 
           y = prop, 
           fill = pool), 
       data=cluster_pool_renamed) + 
  geom_bar(stat = "identity") +
  labs(x = "Pool Sampled", y="Proportion", fill="Pool Sampled", title="Pool sampled in each cluster") +
  theme_minimal() +
  theme(panel.grid.major = element_line(color = "black"),
        panel.grid.minor = element_line(color = "black"),
        panel.grid.major.x = element_blank(),  # Remove vertical major grid lines
        panel.grid.minor.x = element_blank(),
        axis.text.x = element_blank(),  # Removes text
        axis.ticks.x = element_blank()) +
  facet_wrap(vars(X6cluster))

Code
cluster_div <- cluster_merge_renamed %>%
  group_by(`X6cluster`, dv_score) %>%
  summarize(count = n()) %>%
  mutate(prop = count / sum(count)) %>% 
  arrange(desc(dv_score))
cluster_div
# A tibble: 26 × 4
# Groups:   X6cluster [6]
   X6cluster        dv_score count   prop
   <fct>               <int> <int>  <dbl>
 1 Bear Trout Bee         12  4570 0.822 
 2 Stream Dragonfly       12  4711 0.731 
 3 Meadow Bee             12  4727 0.646 
 4 Eagle Rabbit           12  5339 0.912 
 5 Deer Fox               12  6691 0.921 
 6 Elevated Trout         12  3387 0.794 
 7 Bear Trout Bee          7   839 0.151 
 8 Stream Dragonfly        7  1412 0.219 
 9 Meadow Bee              7  1946 0.266 
10 Eagle Rabbit            7   453 0.0774
# ℹ 16 more rows
Code
ggplot(aes(x = dv_score, y = prop, color = X6cluster, linetype = X6cluster), data=cluster_div) +
  geom_line(size = 1.1) +
  labs(x = "Score", y = "Density", color = "Cluster", linetype = "Cluster", title="Diversity Score distribution by cluster") +
  scale_color_manual(values = c("#000000", "#377eb8", "#4daf4a", "#984ea3", "#a65628",
                                "#f781bf")) +
  scale_x_continuous(
    breaks = c(-5, 0, 3, 7, 12)) +
  theme_minimal()

Code
cluster_merge_renamed %>%
  filter(X6cluster == "Meadow Bee") %>%
  group_by(dv_score) %>%
  summarize(count = n()) %>%
  arrange(desc(dv_score))
# A tibble: 5 × 2
  dv_score count
     <int> <int>
1       12  4727
2        7  1946
3        3   562
4        0    78
5       -5     2
Code
cluster_merge_renamed %>%
  filter(X6cluster == "Stream Dragonfly") %>%
  group_by(dv_score) %>%
  summarize(count = n()) %>%
  arrange(desc(dv_score))
# A tibble: 5 × 2
  dv_score count
     <int> <int>
1       12  4711
2        7  1412
3        3   290
4        0    33
5       -5     2
Code
cluster_merge_renamed %>%
  filter(X6cluster == "Bear Trout Bee") %>%
  group_by(dv_score) %>%
  summarize(count = n()) %>%
  arrange(desc(dv_score))
# A tibble: 4 × 2
  dv_score count
     <int> <int>
1       12  4570
2        7   839
3        3   141
4        0    10
Code
cluster_merge_renamed %>%
  filter(X6cluster == "Eagle Rabbit") %>%
  group_by(dv_score) %>%
  summarize(count = n()) %>%
  arrange(desc(dv_score))
# A tibble: 4 × 2
  dv_score count
     <int> <int>
1       12  5339
2        7   453
3        3    55
4        0     5
Code
cluster_merge_renamed %>%
  filter(X6cluster == "Deer Fox") %>%
  group_by(dv_score) %>%
  summarize(count = n()) %>%
  arrange(desc(dv_score))
# A tibble: 4 × 2
  dv_score count
     <int> <int>
1       12  6691
2        7   544
3        3    32
4        0     1
Code
cluster_merge_renamed %>%
  filter(X6cluster == "Elevated Trout") %>%
  group_by(dv_score) %>%
  summarize(count = n()) %>%
  arrange(desc(dv_score))
# A tibble: 4 × 2
  dv_score count
     <int> <int>
1       12  3387
2        7   754
3        3   121
4        0     3
Code
cluster_share <- cluster_merge_renamed %>%
  group_by(`X6cluster`) %>%
  summarize(meadow = mean(meadow_score),
            stream = mean(stream_score),
            bear = mean(bear_score),
            deer = mean(deer_score),
            fox = mean(fox_score),
            wolf = mean(wolves_score),
            trout = mean(trout_score),
            bee = mean(bee_score),
            dragonfly = mean(dragonfly_score),
            eagle = mean(eagle_score),
            rabbit = mean(rabbit_score),
            diversity = mean(dv_score),
            total = mean(score)
            ) %>%
  arrange(desc(total))
cluster_share
# A tibble: 6 × 14
  X6cluster    meadow stream  bear  deer   fox  wolf trout   bee dragonfly eagle
  <fct>         <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl>
1 Meadow Bee    14.5    2.25  4.61  6.01  4.56  1.48  4.05 14.5       2.22  2.93
2 Eagle Rabbit   6.21   2.32  5.20  6.42  4.66  1.47  5.21  5.23      2.44 13.6 
3 Elevated Tr…   6.19   1.83  4.91  6.81  5.13  1.84 12.6   5.71      1.42  8.22
4 Bear Trout …   5.44   2.25 13.0   6.14  5.13  1.64  5.92  7.53      2.23  4.29
5 Stream Drag…   5.10   5.10  4.14  6.01  4.39  1.28  7.21  4.73     11.3   3.64
6 Deer Fox       6.76   2.50  4.75 10.9   7.26  1.86  4.54  6.15      2.85  3.37
# ℹ 3 more variables: rabbit <dbl>, diversity <dbl>, total <dbl>

Heat Maps

Code
cluster_position_renamed <- cluster_position_renamed %>%
  mutate(
      card = factor(card, levels = c(
      "Meadow", "Stream", "Bear", 
      "Deer", "Fox", "Wolf", "Trout", "Bee", 
      "Dragonfly", "Eagle", "Rabbit"
    )))
Code
ggplot(aes(x=col, y=row, fill=proportion), data=(overall_position)) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837", limits = c(0, NA)) +
  facet_wrap(vars(card)) +
  labs(title="Positions across all 50,000", x = "Row", y = "Column", fill = "Proportion") +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.ticks.length = unit(0, "pt"),
    panel.grid = element_blank() 
  )

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% 
                                                   filter(cluster == "Bear Trout Bee"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837", limits = c(0, NA)) +
  facet_wrap(vars(card)) +
  labs(title="Bear Trout Bee cluster", x = "Row", y = "Column", fill = "Proportion") +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.ticks.length = unit(0, "pt"),
    panel.grid = element_blank() 
  )

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% 
                                                   filter(cluster == "Bear Trout Bee",
                                                          card %in% c("Meadow", "Stream", "Dragonfly", "Eagle")))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837", limits = c(0, NA)) +
  facet_wrap(vars(card)) +
  labs(title="Bear Trout Bee cluster", x = "Row", y = "Column", fill = "Proportion") +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.ticks.length = unit(0, "pt"),
    panel.grid = element_blank() 
  )

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(cluster == "Stream Dragonfly"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837", limits = c(0, NA)) +
  facet_wrap(vars(card)) +
  labs(title="Stream Dragonfly cluster")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% 
                                                   filter(cluster == "Stream Dragonfly",
                                                          card %in% c("Meadow", "Stream", "Bear", "Bee")))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837", limits = c(0, NA)) +
  facet_wrap(vars(card)) +
  labs(title="Stream Dragonfly cluster", x = "Row", y = "Column", fill = "Proportion") +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.ticks.length = unit(0, "pt"),
    panel.grid = element_blank() 
  )

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(cluster == "Meadow Bee"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(card)) +
  labs(title="Meadow Bee cluster")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% 
                                                   filter(cluster == "Meadow Bee",
                                                          card %in% c("Meadow", "Stream", "Trout", "Bee")))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837", limits = c(0, NA)) +
  facet_wrap(vars(card)) +
  labs(title="Meadow Bee cluster", x = "Row", y = "Column", fill = "Proportion") +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.ticks.length = unit(0, "pt"),
    panel.grid = element_blank() 
  )

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(cluster == "Eagle Rabbit"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(card)) +
  labs(title="Eagle Rabbit cluster")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% 
                                                   filter(cluster == "Eagle Rabbit",
                                                          card %in% c("Meadow", "Eagle")))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837", limit = c(0,NA)) +
  facet_wrap(vars(card)) +
  labs(title="Eagle Rabbit cluster", x = "Row", y = "Column", fill = "Proportion") +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.ticks.length = unit(0, "pt"),
    panel.grid = element_blank() 
  ) +
  coord_fixed(ratio = 0.8)

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(cluster == "Deer Fox"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(card)) +
  labs(title="Deer Fox cluster")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% 
                                                   filter(cluster == "Deer Fox"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(card)) +
  labs(title="Deer Fox cluster")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(cluster == "Elevated Trout"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(card)) +
  labs(title="Elevated Trout cluster")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Bear"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Bear positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Bee"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Bee positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Meadow"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Meadow positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Trout"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Trout positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Eagle"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Eagle positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Rabbit"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Rabbit positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Dragonfly"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Dragonfly positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Fox"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Fox positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Deer"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Deer positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Stream"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Stream positions across clusters")

Code
ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Wolf"))) +
  geom_tile() +
  theme_minimal() +
  scale_fill_gradient(low="#F0F0F0", high="#006837") +
  facet_wrap(vars(cluster)) +
  labs(title="Wolf positions across clusters")

Unclustered

Code
db_pos = read_csv(here::here("database-CORRECTED-a.csv"))
Code
neighbors = db_pos |>
  filter(score > 57) |>
  select(!pool) |>
  pivot_longer(!c(ID, score), names_to = "position", values_to = "card") |>
  mutate(row = substr(position, start = 4, stop = 4),
         col = substr(position, start = 8, stop = 8),
         row = as.numeric(row),
         col = as.numeric(col))
Code
# neighboring card on right
neighbors = neighbors |>
  left_join(neighbors |>
              mutate(col = col - 1) |>
              rename(right_neighbor = card) |>
              select(ID, row, col, right_neighbor),
            join_by(ID, row, col))

# neighboring card on left
neighbors = neighbors |>
  left_join(neighbors |>
              mutate(col = col + 1) |>
              rename(left_neighbor = card) |>
              select(ID, row, col, left_neighbor),
            join_by(ID, row, col))

# neighboring card up
neighbors = neighbors |>
  left_join(neighbors |>
              mutate(row = row - 1) |>
              rename(up_neighbor = card) |>
              select(ID, row, col, up_neighbor),
            join_by(ID, row, col))

# neighboring card down
neighbors = neighbors |>
  left_join(neighbors |>
              mutate(row = row + 1) |>
              rename(down_neighbor = card) |>
              select(ID, row, col, down_neighbor),
            join_by(ID, row, col))
Code
neighbors |> head(10)
# A tibble: 10 × 10
      ID score position card        row   col right_neighbor left_neighbor
   <dbl> <dbl> <chr>    <chr>     <dbl> <dbl> <chr>          <chr>        
 1     1    68 row1col1 Dragonfly     1     1 Stream         <NA>         
 2     1    68 row1col2 Stream        1     2 Deer           Dragonfly    
 3     1    68 row1col3 Deer          1     3 Rabbit         Stream       
 4     1    68 row1col4 Rabbit        1     4 Fox            Deer         
 5     1    68 row1col5 Fox           1     5 <NA>           Rabbit       
 6     1    68 row2col1 Bear          2     1 Trout          <NA>         
 7     1    68 row2col2 Trout         2     2 Bear           Bear         
 8     1    68 row2col3 Bear          2     3 Deer           Trout        
 9     1    68 row2col4 Deer          2     4 Rabbit         Bear         
10     1    68 row2col5 Rabbit        2     5 <NA>           Deer         
# ℹ 2 more variables: up_neighbor <chr>, down_neighbor <chr>
Code
neighbors_long = neighbors |>
  pivot_longer(!c(ID, score, position, row, col, card),
               names_to = "neighbor",
               values_to = "neighbor_card")
Code
neighbors_long |> head(10)
# A tibble: 10 × 8
      ID score position card        row   col neighbor       neighbor_card
   <dbl> <dbl> <chr>    <chr>     <dbl> <dbl> <chr>          <chr>        
 1     1    68 row1col1 Dragonfly     1     1 right_neighbor Stream       
 2     1    68 row1col1 Dragonfly     1     1 left_neighbor  <NA>         
 3     1    68 row1col1 Dragonfly     1     1 up_neighbor    Bear         
 4     1    68 row1col1 Dragonfly     1     1 down_neighbor  <NA>         
 5     1    68 row1col2 Stream        1     2 right_neighbor Deer         
 6     1    68 row1col2 Stream        1     2 left_neighbor  Dragonfly    
 7     1    68 row1col2 Stream        1     2 up_neighbor    Trout        
 8     1    68 row1col2 Stream        1     2 down_neighbor  <NA>         
 9     1    68 row1col3 Deer          1     3 right_neighbor Rabbit       
10     1    68 row1col3 Deer          1     3 left_neighbor  Stream       
Code
neighbors_sum = neighbors_long |>
  filter(!is.na(neighbor_card)) |>
  group_by(card) |>
  count(neighbor_card) |>
  mutate(proportion = n / sum(n))
Code
ggplot(neighbors_sum,
       aes(x = card,
           y = neighbor_card,
           fill = proportion)) +
  geom_tile() + 
  scale_fill_distiller(palette = "Greens", direction = 1)

Code
neighbors_sum <- neighbors_sum %>%
mutate(
      card = factor(card, levels = c(
      "Meadow", "Stream", "Bear", 
      "Deer", "Fox", "Wolf", "Trout", "Bee", 
      "Dragonfly", "Eagle", "Rabbit"
    ))) %>%
  mutate(
      neighbor_card = factor(neighbor_card, levels = c(
      "Meadow", "Stream", "Bear", 
      "Deer", "Fox", "Wolf", "Trout", "Bee", 
      "Dragonfly", "Eagle", "Rabbit"
    )))
Code
ggplot(neighbors_sum,
       aes(x = card,
           fill = neighbor_card,
           y = proportion)) +
  geom_bar(position = "fill", stat = "identity") +
  scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
                    "#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62"))+
  theme_minimal()

Code
ggplot(neighbors_sum,
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  facet_wrap(vars(card))

Clustered

Code
db_cluster = read_csv(here::here("normalized_clusters-CORRECTED.csv"))
Code
neighbors_cluster = db_cluster |>
  pivot_longer(!c(ID, pool, score, contains("cluster")),
               names_to = "position", values_to = "card") |>
  mutate(row = substr(position, start = 4, stop = 4),
         col = substr(position, start = 8, stop = 8),
         row = as.numeric(row),
         col = as.numeric(col))
Code
# neighboring card on right
neighbors_cluster = neighbors_cluster |>
  left_join(neighbors_cluster |>
              mutate(col = col - 1) |>
              rename(right_neighbor = card) |>
              select(ID, row, col, right_neighbor),
            join_by(ID, row, col))

# neighboring card on left
neighbors_cluster = neighbors_cluster |>
  left_join(neighbors_cluster |>
              mutate(col = col + 1) |>
              rename(left_neighbor = card) |>
              select(ID, row, col, left_neighbor),
            join_by(ID, row, col))

# neighboring card up
neighbors_cluster = neighbors_cluster |>
  left_join(neighbors_cluster |>
              mutate(row = row - 1) |>
              rename(up_neighbor = card) |>
              select(ID, row, col, up_neighbor),
            join_by(ID, row, col))

# neighboring card down
neighbors_cluster = neighbors_cluster |>
  left_join(neighbors_cluster |>
              mutate(row = row + 1) |>
              rename(down_neighbor = card) |>
              select(ID, row, col, down_neighbor),
            join_by(ID, row, col))
Code
neighbors_cluster |> head(10)
# A tibble: 10 × 22
      ID score pool    `7cluster` `3cluster` `4cluster` `5cluster` `6cluster`
   <dbl> <dbl> <chr>   <chr>      <chr>      <chr>      <chr>      <chr>     
 1     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 2     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 3     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 4     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 5     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 6     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 7     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 8     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 9     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
10     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
# ℹ 14 more variables: `8cluster` <chr>, `9cluster` <chr>, `6cluster_s5` <chr>,
#   `6cluster_s6` <chr>, `8cluster_s5` <chr>, `7cluster_s5` <chr>,
#   position <chr>, card <chr>, row <dbl>, col <dbl>, right_neighbor <chr>,
#   left_neighbor <chr>, up_neighbor <chr>, down_neighbor <chr>
Code
neighbors_cluster_long = neighbors_cluster |>
  pivot_longer(!c(ID, pool, contains("cluster"), score, position, row, col, card),
               names_to = "neighbor",
               values_to = "neighbor_card")
Code
neighbors_cluster_long |> head(10)
# A tibble: 10 × 20
      ID score pool    `7cluster` `3cluster` `4cluster` `5cluster` `6cluster`
   <dbl> <dbl> <chr>   <chr>      <chr>      <chr>      <chr>      <chr>     
 1     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 2     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 3     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 4     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 5     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 6     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 7     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 8     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
 9     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
10     1    68 default Cluster_1  Cluster_1  Cluster_1  Cluster_1  Cluster_1 
# ℹ 12 more variables: `8cluster` <chr>, `9cluster` <chr>, `6cluster_s5` <chr>,
#   `6cluster_s6` <chr>, `8cluster_s5` <chr>, `7cluster_s5` <chr>,
#   position <chr>, card <chr>, row <dbl>, col <dbl>, neighbor <chr>,
#   neighbor_card <chr>

Additional Clustered neighbor

Code
neighbors_sum2 = neighbors_cluster_long |>
  filter(!is.na(neighbor_card)) |>
  group_by(card, `6cluster`) |>
  count(neighbor_card) |>
  mutate(proportion = n / sum(n))
Code
neighbors_sum2_renamed <- neighbors_sum2 %>%
    mutate(
      `6cluster` = factor(`6cluster`, levels = c(
      "Cluster_1", "Cluster_2", "Cluster_3", 
      "Cluster_4", "Cluster_5", "Cluster_6"
    ))) %>%
    mutate(`6cluster` = fct_recode(`6cluster`,
      "Bear Trout Bee" = "Cluster_1",
      "Stream Dragonfly" = "Cluster_2",
      "Meadow Bee" = "Cluster_3",
      "Eagle Rabbit" = "Cluster_4",
      "Deer Fox" = "Cluster_5",
      "Elevated Trout" = "Cluster_6"
    )) %>%
  mutate(
      neighbor_card = factor(neighbor_card, levels = c(
      "Meadow", "Stream", "Bear", 
      "Deer", "Fox", "Wolf", "Trout", "Bee", 
      "Dragonfly", "Eagle", "Rabbit" 
    ))) %>%
  mutate(
      card = factor(card, levels = c(
      "Meadow", "Stream", "Bear", 
      "Deer", "Fox", "Wolf", "Trout", "Bee", 
      "Dragonfly", "Eagle", "Rabbit" 
    )))
Code
ggplot(neighbors_sum2,
       aes(x = `6cluster`,
           fill = neighbor_card,
           y = proportion)) +
  geom_bar(position = "fill", stat = "identity") +
  scale_fill_viridis_d() +
  facet_wrap(vars(card)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8))

Code
ggplot(neighbors_sum2_renamed,
       aes(x = `6cluster`,
           fill = neighbor_card,
           y = proportion)) +
  geom_bar(position = "fill", stat = "identity") +
  scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
                    "#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
  facet_wrap(vars(card)) +
  theme(axis.text.x = element_text(angle = 72, hjust = 1, size = 8)) +
  labs(title = "Propotion of neighbor cards by cluster for each card type")

Code
ggplot(neighbors_sum2,
       aes(x = card,
           y = neighbor_card,
           fill = proportion)) +
  geom_tile() + 
  scale_fill_distiller(palette = "Greens", direction = 1) +
  facet_wrap(vars(`6cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8))

Code
ggplot(neighbors_sum2 %>% filter(card == "Bear"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`6cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Bear neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Bee"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`6cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Bee neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Meadow"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`6cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Meadow neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Trout"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`6cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Trout neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Eagle"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`6cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Eagle neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Rabbit"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`6cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Rabbit neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Dragonfly"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`6cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Dragonfly neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Fox"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`6cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Fox neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Deer"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`6cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Deer neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Stream"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`6cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Stream neighbors in each cluster")

Code
ggplot(neighbors_sum2 %>% filter(card == "Wolf"),
       aes(x = neighbor_card,
           y = proportion,
           fill = neighbor_card)) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d() +
  theme_minimal() +
  facet_wrap(vars(`6cluster`)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
  labs(title="Wolf neighbors in each cluster")

Code
db_individual_removed <- read.csv(here::here("db_individual_lowest_removed-CORRECTED.csv"))
Code
cluster_merge <- merge(x = cluster_data, y = db_individual_removed, by = "ID")
Code
ggplot(aes(x = score), data = cluster_merge) +
  geom_histogram(binwidth = 5, fill = "green3") +
  theme_minimal() +
  facet_wrap(vars(X6cluster)) +
  labs(title="Score distribution by cluster")

Code
ggplot(cluster_merge, aes(x = score, color = factor(X7cluster))) +
  geom_density(size = 1.2) +
  labs(x = "Score", y = "Density", color = "Cluster", title="Score distribution by cluster") +
  theme_minimal()

Code
low_score = database_new %>% anti_join(cluster_merge, by = "ID")
Code
ggplot(cluster_merge, aes(x = score, color = factor(X6cluster))) +
  geom_density(size = 1.2, key_glyph = "path") +
  geom_density(aes(x=score, color = "Unclustered"), size = 1.2, data=low_score, key_glyph = "path") +
  geom_density(aes(x=score, color = "All grids"), size = 1.2, data=database_new, key_glyph = "path") +
  labs(x = "Score", y = "Density", color = "Cluster", title="Score distribution by cluster") +
  theme_minimal()

Code
ggplot(database_new, aes(x = score, color = pool)) +
  geom_density(size = 1.2, key_glyph = "path") +
  labs(x = "Score", y = "Density", color = "Pool Sampled", title="Score distribution by pool") +
  theme_minimal()

Code
database_start <- read.csv(here::here("database-CORRECTED-s.csv"))
Code
database_start_renamed <- database_start %>%
    mutate(
      pool = factor(pool, levels = c(
      "default", "bee_meadow", "dragonfly_stream", 
      "low_eagle_rabbit"
    ))) %>%
    mutate(pool = fct_recode(pool,
      "Default Pool" = "default",
      "Meadow Bee Pool" = "bee_meadow",
      "Stream Dragonfly Pool" = "dragonfly_stream",
      "Low Eagle Rabbit Pool" = "low_eagle_rabbit"
    ))
Code
database_new_renamed <- database_new %>%
    mutate(
      pool = factor(pool, levels = c(
      "default", "bee_meadow", "dragonfly_stream", 
      "low_eagle_rabbit"
    ))) %>%
    mutate(pool = fct_recode(pool,
      "Default Pool" = "default",
      "Meadow Bee Pool" = "bee_meadow",
      "Stream Dragonfly Pool" = "dragonfly_stream",
      "Low Eagle Rabbit Pool" = "low_eagle_rabbit"
    ))
Code
ggplot(database_start_renamed, aes(x = start_score, color = pool)) +
  geom_density(aes(linetype = factor(pool)), size = 1.2, key_glyph = "path", show.legend = FALSE) +
  stat_density(data=database_new_renamed, aes(x = score, color = pool), size = 1.1,
               geom="line",position="identity") +
  scale_linetype_manual(values = c("dashed", "dotted", "dotdash", "longdash", "twodash")) +
  labs(x = "Score", y = "Density", color = "Pool Sampled", title="Start Score distribution by pool") +
  theme_minimal()

Code
# write.csv(cluster_merge, here::here("cluster_merge.csv"), row.names = FALSE)
Code
# write.csv(neighbors_cluster_long, here::here("neighbor-cluster.csv"), row.names = FALSE)

Testings

Code
two_player <- list(c(37,4,2,2), c(45,3,3,3))
four_player <- list(c(37,5,1,2), c(45,2,4,3), c(60,0,0,0), c(47,4,3,4))
five_player <- list(c(37,3,0,2), c(45,2,4,3), c(60,3,0,0), c(47,2,0,4), c(38,1,4,4))
Code
mp_score(two_player)
[1] 37 45
[1] 45 50
[1] 53 62
[1] 65 69
Code
mp_score(four_player)
[1] 37 45 60 47
[1] 45 45 60 52
[1] 49 57 60 60
[1] 61 60 72 63
Code
mp_score(five_player)
[1] 37 45 60 47 38
[1] 45 45 68 47 38
[1] 45 57 68 47 50
[1] 52 60 80 42 50
Code
z <- score_grid(sample_grid)
z
[1] 29  0  0  6
Code
z2 <- score_grid(sample_grid2)
z2
[1] 36  0  0  7
Code
z3 <- score_grid(big_grid1)
z3
[1] 43  3  1  0
Code
set.seed(48)
test_grid <- generate_grid(cards)
test_grid
     [,1]        [,2]        [,3]     [,4]     [,5]    
[1,] "Dragonfly" "Dragonfly" "Stream" "Meadow" "Fox"   
[2,] "Bear"      "Trout"     "Meadow" "Fox"    "Eagle" 
[3,] "Trout"     "Rabbit"    "Stream" "Bee"    "Stream"
[4,] "Wolf"      "Rabbit"    "Deer"   "Meadow" "Bee"   
Code
x <- find_cardinals(0,3,test_grid)
x
[[1]]
[1] 1 3

[[2]]
[1] 0 4

[[3]]
[1] 0 2
Code
c <- sample(x, 1)
c
[[1]]
[1] 1 3
Code
"el"
[1] "el"
Code
c[[1]][1]
[1] 1
Code
score_grid(test_grid)
[1] 25  1  1  2
Code
solo_score(score_grid(test_grid))
[1] 39
Code
set.seed(48)
baseline_scores <- baseline_sim(cards)
Code
mean(baseline_scores)
[1] 28.3123
Code
sd(baseline_scores)
[1] 9.268601
Code
var(baseline_scores)
[1] 85.90696
Code
max(baseline_scores)
[1] 58
Code
min(baseline_scores)
[1] 2
Code
summary(baseline_scores)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   2.00   22.00   28.00   28.31   35.00   58.00 
Code
baseline_data <- data.frame(baseline_scores)
ggplot(aes(x = baseline_scores), data = baseline_data) +
  geom_histogram(binwidth = 5, fill = "steelblue")

Code
set.seed(4)

slide_grid <- matrix(c("Stream", "Dragonfly", "Wolf", "Meadow", "Meadow", "Stream", "Trout", "Bear", "Bee", "Meadow",
                      "Stream", "Stream", "Deer", "Eagle", "Wolf", "Stream", "Dragonfly", "Fox", "Deer",
                      "Rabbit"),4,5,byrow=TRUE)

run1 <- rw_mcmc(slide_grid, 1000, "annealing dynamic", 0.9, 200, record_board = TRUE)

completed_slide <-  matrix(run1[1:20], 4,5,byrow=TRUE)

slide_grid
     [,1]     [,2]        [,3]   [,4]     [,5]    
[1,] "Stream" "Dragonfly" "Wolf" "Meadow" "Meadow"
[2,] "Stream" "Trout"     "Bear" "Bee"    "Meadow"
[3,] "Stream" "Stream"    "Deer" "Eagle"  "Wolf"  
[4,] "Stream" "Dragonfly" "Fox"  "Deer"   "Rabbit"
Code
solo_score(score_grid(slide_grid))
[1] 65
Code
completed_slide
     [,1]        [,2]        [,3]    [,4]     [,5]    
[1,] "Stream"    "Dragonfly" "Wolf"  "Meadow" "Meadow"
[2,] "Stream"    "Trout"     "Bear"  "Bee"    "Meadow"
[3,] "Stream"    "Stream"    "Eagle" "Rabbit" "Deer"  
[4,] "Dragonfly" "Stream"    "Fox"   "Deer"   "Wolf"  
Code
solo_score(score_grid(completed_slide))
[1] 67
Code
slide_grid_op <- matrix(c("Stream", "Dragonfly", "Wolf", "Meadow", "Meadow", "Stream", "Trout", "Bear", "Bee", "Meadow",
                      "Stream", "Stream", "Eagle", "Rabbit", "Deer", "Stream", "Dragonfly", "Fox", "Deer",
                      "Wolf"),4,5,byrow=TRUE)
slide_grid_op
     [,1]     [,2]        [,3]    [,4]     [,5]    
[1,] "Stream" "Dragonfly" "Wolf"  "Meadow" "Meadow"
[2,] "Stream" "Trout"     "Bear"  "Bee"    "Meadow"
[3,] "Stream" "Stream"    "Eagle" "Rabbit" "Deer"  
[4,] "Stream" "Dragonfly" "Fox"   "Deer"   "Wolf"  
Code
solo_score(score_grid(slide_grid_op))
[1] 67
Code
first_row <- score_grid(matrix(c(t(slide_grid)),nrow=4,ncol=5,byrow=T), individual=TRUE)

slide_score <- data.frame(
  bear_score = c(first_row[1]),
  bee_score = c(first_row[2]),
  meadow_score = c(first_row[3]),
  trout_score = c(first_row[4]),
  eagle_score = c(first_row[5]),
  rabbit_score = c(first_row[6]),
  dragonfly_score = c(first_row[7]),
  fox_score = c(first_row[8]),
  deer_score = c(first_row[9]),
  stream_score = c(first_row[10]),
  wolves_score = c(first_row[11]),
  dv_score = c(first_row[12])
)
slide_score
  bear_score bee_score meadow_score trout_score eagle_score rabbit_score
1          4         6            6           6           2            1
  dragonfly_score fox_score deer_score stream_score wolves_score dv_score
1              10         3          8            5            2       12
Code
# stream dragonfly trout
(choose(20, 7)*choose(8, 3)*choose(10, 3)*choose(92, 7))/choose(130, 20)
[1] 2.726866e-05
Code
(choose(20, 5)*choose(8, 2)*choose(10, 2)*choose(92, 11))/choose(130, 20)
[1] 0.006274223
Code
# stream dragonfly
(choose(20, 9)*choose(8, 4)*choose(102, 7))/choose(130, 20)
[1] 1.297323e-06
Code
(choose(20, 7)*choose(8, 3)*choose(102, 10))/choose(130, 20)
[1] 0.0005525201
Code
# fox deer/wolf deer
(choose(12, 4)*choose(12, 3)*choose(106, 13))/choose(130, 20)
[1] 0.01035475
Code
(choose(12, 3)*choose(12, 2)*choose(106, 15))/choose(130, 20)
[1] 0.05625095
Code
# bee meadow
(choose(20, 9)*choose(8, 4)*choose(102, 7))/choose(130, 20)
[1] 1.297323e-06
Code
(choose(20, 5)*choose(8, 2)*choose(102, 13))/choose(130, 20)
[1] 0.02426065
Code
# bear bee trout
(choose(12, 4)*choose(8, 3)*choose(10, 4)*choose(100, 9))/choose(130, 20)
[1] 6.61645e-05
Code
(choose(12, 3)*choose(8, 1)*choose(10, 2)*choose(100, 14))/choose(130, 20)
[1] 0.02091068
Code
# eagle rabbit 
(choose(8, 3)*choose(8, 3)*choose(10, 3)*choose(104, 11))/choose(130, 20)
[1] 0.0005015318
Code
(choose(8, 2)*choose(8, 2)*choose(10, 1)*choose(104, 15))/choose(130, 20)
[1] 0.02234951
Code
xvy <- cluster_merge %>% filter(X7cluster == "Cluster_4")
Code
# fps2 <- read.csv(here::here("final-parameters-seed2.csv"))
Code
tuned_params <- read.csv(here::here("tuned-parameters.csv"))

multivariate hypergeometric function

Code
multivariate_hypergeometric <- function(desired_counts, deck_counts, total_draw, total_deck = NULL) {
  # Input validation
  if (length(desired_counts) != length(deck_counts)) {
    stop("Length of desired_counts must match deck_counts")
  }
  if (any(desired_counts < 0) || any(deck_counts < 0)) {
    stop("Counts cannot be negative")
  }
  if (total_draw <= 0) {
    stop("Total draw must be positive")
  }
  
  # Calculate total deck size and other cards
  if (is.null(total_deck)) {
    total_deck <- sum(deck_counts)
    warning("Assuming total deck size is sum of provided deck_counts (", total_deck, 
            "). Specify total_deck if different.")
  }
  
  other_cards <- total_deck - sum(deck_counts)
  if (other_cards < 0) {
    stop("Sum of deck_counts exceeds total_deck")
  }
  
  # Check if draw is possible
  if (total_draw > total_deck) {
    stop("Cannot draw more cards than exist in deck")
  }
  if (sum(desired_counts) > total_draw) {
    return(0)  # Impossible to get all desired counts
  }
  
  # Initialize probability
  total_prob <- 0
  
  # Recursive helper function
  generate_combinations <- function(current, remaining_draw, remaining_types, prob_so_far) {
    if (length(current) == length(desired_counts)) {
      # Base case: all types assigned
      remaining_cards <- remaining_draw
      if (remaining_cards >= 0) {
        # Calculate final probability for this combination
        final_prob <- prob_so_far * 
          choose(other_cards, remaining_cards) / 
          choose(total_deck, total_draw)
        return(final_prob)
      } else {
        return(0)
      }
    }
    
    # Recursive case: assign counts for current type
    type_index <- length(current) + 1
    min_count <- desired_counts[type_index]
    max_count <- min(deck_counts[type_index], remaining_draw)
    
    prob_sum <- 0
    for (count in min_count:max_count) {
      new_current <- c(current, count)
      new_remaining <- remaining_draw - count
      new_prob <- prob_so_far * choose(deck_counts[type_index], count)
      prob_sum <- prob_sum + 
        generate_combinations(new_current, new_remaining, remaining_types, new_prob)
    }
    return(prob_sum)
  }
  
  # Start recursive calculation
  total_prob <- generate_combinations(
    current = numeric(0),
    remaining_draw = total_draw,
    remaining_types = length(desired_counts),
    prob_so_far = 1
  )
  
  return(total_prob)
}
Code
prob <- multivariate_hypergeometric(
  desired_counts = c(2, 5),    # Minimums for bees and meadows
  deck_counts = c(8, 20),      # Counts in deck for bees and meadows
  total_draw = 20,             # Drawing 20 cards
  total_deck = 130             # Total cards in deck
)
prob
[1] 0.04757679
Code
# bear bee trout minimum
prob <- multivariate_hypergeometric(
  desired_counts = c(3, 2, 2),
  deck_counts = c(12, 8, 10),
  total_draw = 20,
  total_deck = 130
)
prob
[1] 0.03272014
Code
# bear bee trout maximum
prob <- multivariate_hypergeometric(
  desired_counts = c(4, 3, 3),
  deck_counts = c(12, 8, 10),
  total_draw = 20,
  total_deck = 130
)
prob
[1] 0.0005701113
Code
# deer fox wolf min
prob <- multivariate_hypergeometric(
  desired_counts = c(3, 2, 1),
  deck_counts = c(12, 12, 12),
  total_draw = 20,
  total_deck = 130
)
prob
[1] 0.1203596
Code
# deer fox wolf max
prob <- multivariate_hypergeometric(
  desired_counts = c(4, 4, 2),
  deck_counts = c(12, 12, 12),
  total_draw = 20,
  total_deck = 130
)
prob
[1] 0.001886164
Code
# bee meadow min
prob <- multivariate_hypergeometric(
  desired_counts = c(2, 5),    # Minimums for bees and meadows
  deck_counts = c(8, 20),      # Counts in deck for bees and meadows
  total_draw = 20,             # Drawing 20 cards
  total_deck = 130             # Total cards in deck
)
prob
[1] 0.04757679
Code
# bee meadow max
prob <- multivariate_hypergeometric(
  desired_counts = c(3, 8),    # Minimums for bees and meadows
  deck_counts = c(8, 20),      # Counts in deck for bees and meadows
  total_draw = 20,             # Drawing 20 cards
  total_deck = 130             # Total cards in deck
)
prob
[1] 0.0001239406
Code
# eagle rabbit trout min
prob <- multivariate_hypergeometric(
  desired_counts = c(2, 2, 1),    
  deck_counts = c(8, 8, 10),      
  total_draw = 20,             
  total_deck = 130             
)
prob
[1] 0.0902812
Code
# eagle rabbit trout max
prob <- multivariate_hypergeometric(
  desired_counts = c(3, 3, 2),    
  deck_counts = c(8, 8, 10),      
  total_draw = 20,             
  total_deck = 130             
)
prob
[1] 0.002838628
Code
# stream dragonfly trout min
prob <- multivariate_hypergeometric(
  desired_counts = c(5, 2, 1),    
  deck_counts = c(20, 8, 10),      
  total_draw = 20,             
  total_deck = 130             
)
prob
[1] 0.03519305
Code
# stream dragonfly trout max
prob <- multivariate_hypergeometric(
  desired_counts = c(7, 3, 2),    
  deck_counts = c(20, 8, 10),      
  total_draw = 20,             
  total_deck = 130             
)
prob
[1] 0.0001824627
Code
# trout min
prob <- multivariate_hypergeometric(
  desired_counts = c(3),    
  deck_counts = c(10),      
  total_draw = 20,             
  total_deck = 130             
)
prob
[1] 0.1831558
Code
# trout max
prob <- multivariate_hypergeometric(
  desired_counts = c(4),    
  deck_counts = c(10),      
  total_draw = 20,             
  total_deck = 130             
)
prob
[1] 0.04698214
Code
# stream dragonfly min
prob <- multivariate_hypergeometric(
  desired_counts = c(5, 2),    
  deck_counts = c(20, 8),      
  total_draw = 20,             
  total_deck = 130             
)
prob
[1] 0.04757679
Code
# stream dragonfly max
prob <- multivariate_hypergeometric(
  desired_counts = c(7, 3),    
  deck_counts = c(20, 8),      
  total_draw = 20,             
  total_deck = 130             
)
prob
[1] 0.0007566569
Code
1-phyper(3, 12, 118, 20)
[1] 0.08923247